changeset 17066:0bcefb0f8488

Truffle: byte[] sources.
author Chris Seaton <chris.seaton@oracle.com>
date Mon, 08 Sep 2014 22:21:21 +0100
parents a6277ae87f0e
children 2bc092f3d574
files graal/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/source/BytesSourceSectionTest.java graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/BytesDecoder.java graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java
diffstat 3 files changed, 302 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/source/BytesSourceSectionTest.java	Mon Sep 08 22:21:21 2014 +0100
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.truffle.api.test.source;
+
+import static org.junit.Assert.*;
+
+import java.nio.charset.*;
+
+import org.junit.*;
+
+import com.oracle.truffle.api.source.*;
+
+public class BytesSourceSectionTest {
+
+    @Test
+    public void testSectionsFromLineNumberASCII() {
+        final byte[] bytes = "foo\nbar\nbaz\n".getBytes(StandardCharsets.UTF_8);
+        final Source source = Source.fromBytes(bytes, "description", new BytesDecoder.UTF8BytesDecoder());
+        assertEquals("foo", source.createSection("identifier", 1).getCode());
+        assertEquals("bar", source.createSection("identifier", 2).getCode());
+        assertEquals("baz", source.createSection("identifier", 3).getCode());
+    }
+
+    @Test
+    public void testSectionsFromOffsetsASCII() {
+        final byte[] bytes = "foo\nbar\nbaz\n".getBytes(StandardCharsets.UTF_8);
+        final Source source = Source.fromBytes(bytes, "description", new BytesDecoder.UTF8BytesDecoder());
+        assertEquals("foo", source.createSection("identifier", 0, 3).getCode());
+        assertEquals("bar", source.createSection("identifier", 4, 3).getCode());
+        assertEquals("baz", source.createSection("identifier", 8, 3).getCode());
+    }
+
+    @Test
+    public void testSectionsFromLineNumberUTF8() {
+        // ☃ is three bytes in UTF8
+        final byte[] bytes = "foo\n☃\nbaz\n".getBytes(StandardCharsets.UTF_8);
+        final Source source = Source.fromBytes(bytes, "description", new BytesDecoder.UTF8BytesDecoder());
+        assertEquals("foo", source.createSection("identifier", 1).getCode());
+        assertEquals("☃", source.createSection("identifier", 2).getCode());
+        assertEquals("baz", source.createSection("identifier", 3).getCode());
+    }
+
+    @Test
+    public void testSectionsFromOffsetsUTF8() {
+        // ☃ is three bytes in UTF8
+        final byte[] bytes = "foo\n☃\nbaz\n".getBytes(StandardCharsets.UTF_8);
+        final Source source = Source.fromBytes(bytes, "description", new BytesDecoder.UTF8BytesDecoder());
+        assertEquals("foo", source.createSection("identifier", 0, 3).getCode());
+        assertEquals("☃", source.createSection("identifier", 4, 3).getCode());
+        assertEquals("baz", source.createSection("identifier", 8, 3).getCode());
+    }
+
+    @Test
+    public void testOffset() {
+        final byte[] bytes = "xxxfoo\nbar\nbaz\nxxx".getBytes(StandardCharsets.UTF_8);
+        final Source source = Source.fromBytes(bytes, 3, bytes.length - 6, "description", new BytesDecoder.UTF8BytesDecoder());
+        assertEquals("foo", source.createSection("identifier", 0, 3).getCode());
+        assertEquals("bar", source.createSection("identifier", 4, 3).getCode());
+        assertEquals("baz", source.createSection("identifier", 8, 3).getCode());
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/BytesDecoder.java	Mon Sep 08 22:21:21 2014 +0100
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.truffle.api.source;
+
+import java.nio.charset.*;
+import java.util.*;
+
+/**
+ * For a language where strings do not map into Java strings, provides utilities to find line
+ * endings and to decode raw bytes into an approximate representation for tools to display.
+ * <p>
+ * See {@link Source#fromBytes}.
+ */
+public interface BytesDecoder {
+
+    String decode(byte[] bytes, int byteIndex, int length);
+
+    void decodeLines(byte[] bytes, int byteIndex, int length, LineMarker lineMarker);
+
+    public interface LineMarker {
+
+        void markLine(int index);
+
+    }
+
+    public static class UTF8BytesDecoder implements BytesDecoder {
+
+        @Override
+        public String decode(byte[] bytes, int byteIndex, int length) {
+            return new String(Arrays.copyOfRange(bytes, byteIndex, byteIndex + length), StandardCharsets.UTF_8);
+        }
+
+        @Override
+        public void decodeLines(byte[] bytes, int byteIndex, int length, LineMarker lineMarker) {
+            for (int n = byteIndex; n < byteIndex + length; n++) {
+                if (bytes[n] == '\n') {
+                    lineMarker.markLine(n + 1);
+                }
+            }
+        }
+
+    }
+
+}
--- a/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java	Mon Sep 08 13:49:40 2014 +0200
+++ b/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java	Mon Sep 08 22:21:21 2014 +0100
@@ -158,6 +158,37 @@
     }
 
     /**
+     * Creates a source from raw bytes. This can be used if the encoding of strings in your language
+     * is not compatible with Java strings, or if your parser returns byte indices instead of
+     * character indices. The returned source is then indexed by byte, not by character.
+     *
+     * @param bytes the raw bytes of the source
+     * @param description a note about the origin, possibly useful for debugging
+     * @param decoder how to decode the bytes into Java strings
+     * @return a newly created, non-indexed source representation
+     */
+    public static Source fromBytes(byte[] bytes, String description, BytesDecoder decoder) {
+        return fromBytes(bytes, 0, bytes.length, description, decoder);
+    }
+
+    /**
+     * Creates a source from raw bytes. This can be used if the encoding of strings in your language
+     * is not compatible with Java strings, or if your parser returns byte indices instead of
+     * character indices. The returned source is then indexed by byte, not by character. Offsets are
+     * relative to byteIndex.
+     *
+     * @param bytes the raw bytes of the source
+     * @param byteIndex where the string starts in the byte array
+     * @param length the length of the string in the byte array
+     * @param description a note about the origin, possibly useful for debugging
+     * @param decoder how to decode the bytes into Java strings
+     * @return a newly created, non-indexed source representation
+     */
+    public static Source fromBytes(byte[] bytes, int byteIndex, int length, String description, BytesDecoder decoder) {
+        return new BytesSource(description, bytes, byteIndex, length, decoder);
+    }
+
+    /**
      * Creates a source from literal text, but which acts as a file and can be retrieved by name
      * (unlike other literal sources); intended for testing.
      *
@@ -246,6 +277,10 @@
      */
     public abstract String getCode();
 
+    public String getCode(int charIndex, int charLength) {
+        return getCode().substring(charIndex, charIndex + charLength);
+    }
+
     /**
      * Gets the text (not including a possible terminating newline) in a (1-based) numbered line.
      */
@@ -368,10 +403,7 @@
      * @throws IllegalStateException if the source is one of the "null" instances
      */
     public final SourceSection createSection(String identifier, int charIndex, int length) throws IllegalArgumentException {
-        final int codeLength = getCode().length();
-        if (!(charIndex >= 0 && length >= 0 && charIndex + length <= codeLength)) {
-            throw new IllegalArgumentException("text positions out of range");
-        }
+        checkRange(charIndex, length);
         checkTextMap();
         final int startLine = getLineNumber(charIndex);
         final int startColumn = charIndex - getLineStartOffset(startLine) + 1;
@@ -379,6 +411,12 @@
         return new DefaultSourceSection(this, identifier, startLine, startColumn, charIndex, length);
     }
 
+    protected void checkRange(int charIndex, int length) {
+        if (!(charIndex >= 0 && length >= 0 && charIndex + length <= getCode().length())) {
+            throw new IllegalArgumentException("text positions out of range");
+        }
+    }
+
     /**
      * Creates a representation of a line of text in the source identified only by line number, from
      * which the character information will be computed.
@@ -409,15 +447,19 @@
 
     private TextMap checkTextMap() {
         if (textMap == null) {
-            final String code = getCode();
-            if (code == null) {
-                throw new RuntimeException("can't read file " + getName());
-            }
-            textMap = new TextMap(code);
+            textMap = createTextMap();
         }
         return textMap;
     }
 
+    protected TextMap createTextMap() {
+        final String code = getCode();
+        if (code == null) {
+            throw new RuntimeException("can't read file " + getName());
+        }
+        return TextMap.fromString(code);
+    }
+
     private static final class LiteralSource extends Source {
 
         private final String name; // Name used originally to describe the source
@@ -621,6 +663,74 @@
 
     }
 
+    private static final class BytesSource extends Source {
+
+        private final String name;
+        private final byte[] bytes;
+        private final int byteIndex;
+        private final int length;
+        private final BytesDecoder decoder;
+
+        public BytesSource(String name, byte[] bytes, int byteIndex, int length, BytesDecoder decoder) {
+            this.name = name;
+            this.bytes = bytes;
+            this.byteIndex = byteIndex;
+            this.length = length;
+            this.decoder = decoder;
+        }
+
+        @Override
+        protected void reset() {
+        }
+
+        @Override
+        public String getName() {
+            return name;
+        }
+
+        @Override
+        public String getShortName() {
+            return name;
+        }
+
+        @Override
+        public String getPath() {
+            return name;
+        }
+
+        @Override
+        public URL getURL() {
+            return null;
+        }
+
+        @Override
+        public Reader getReader() {
+            return null;
+        }
+
+        @Override
+        public String getCode() {
+            return decoder.decode(bytes, byteIndex, length);
+        }
+
+        @Override
+        public String getCode(int byteOffset, int codeLength) {
+            return decoder.decode(bytes, byteIndex + byteOffset, codeLength);
+        }
+
+        @Override
+        protected void checkRange(int charIndex, int rangeLength) {
+            if (!(charIndex >= 0 && rangeLength >= 0 && charIndex + rangeLength <= length)) {
+                throw new IllegalArgumentException("text positions out of range");
+            }
+        }
+
+        @Override
+        protected TextMap createTextMap() {
+            return TextMap.fromBytes(bytes, byteIndex, length, decoder);
+        }
+    }
+
     private static final class DefaultSourceSection implements SourceSection {
 
         private final Source source;
@@ -704,7 +814,7 @@
 
         @Override
         public final String getCode() {
-            return getSource().getCode().substring(charIndex, charIndex + charLength);
+            return getSource().getCode(charIndex, charLength);
         }
 
         @Override
@@ -866,12 +976,18 @@
         // Is the final text character a newline?
         final boolean finalNL;
 
+        public TextMap(int[] nlOffsets, int textLength, boolean finalNL) {
+            this.nlOffsets = nlOffsets;
+            this.textLength = textLength;
+            this.finalNL = finalNL;
+        }
+
         /**
          * Constructs map permitting translation between 0-based character offsets and 1-based
          * lines/columns.
          */
-        public TextMap(String text) {
-            this.textLength = text.length();
+        public static TextMap fromString(String text) {
+            final int textLength = text.length();
             final ArrayList<Integer> lines = new ArrayList<>();
             lines.add(0);
             int offset = 0;
@@ -887,12 +1003,37 @@
             }
             lines.add(Integer.MAX_VALUE);
 
-            nlOffsets = new int[lines.size()];
+            final int[] nlOffsets = new int[lines.size()];
             for (int line = 0; line < lines.size(); line++) {
                 nlOffsets[line] = lines.get(line);
             }
 
-            finalNL = textLength > 0 && (textLength == nlOffsets[nlOffsets.length - 2]);
+            final boolean finalNL = textLength > 0 && (textLength == nlOffsets[nlOffsets.length - 2]);
+
+            return new TextMap(nlOffsets, textLength, finalNL);
+        }
+
+        public static TextMap fromBytes(byte[] bytes, int byteIndex, int length, BytesDecoder bytesDecoder) {
+            final ArrayList<Integer> lines = new ArrayList<>();
+            lines.add(0);
+
+            bytesDecoder.decodeLines(bytes, byteIndex, length, new BytesDecoder.LineMarker() {
+
+                public void markLine(int index) {
+                    lines.add(index);
+                }
+            });
+
+            lines.add(Integer.MAX_VALUE);
+
+            final int[] nlOffsets = new int[lines.size()];
+            for (int line = 0; line < lines.size(); line++) {
+                nlOffsets[line] = lines.get(line);
+            }
+
+            final boolean finalNL = length > 0 && (length == nlOffsets[nlOffsets.length - 2]);
+
+            return new TextMap(nlOffsets, length, finalNL);
         }
 
         /**