Mercurial > hg > graal-compiler

diff graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/Scanner.frame @ 7292:213c1297a814
Simple Language: A simple dynamic programming language to demonstrate Truffle features
author: Christian Wimmer <christian.wimmer@oracle.com>
date: Fri, 21 Dec 2012 10:45:37 -0800
children: dd1b2da27b38
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/Scanner.frame	Fri Dec 21 10:45:37 2012 -0800
@@ -0,0 +1,512 @@
+/*-------------------------------------------------------------------------
+Compiler Generator Coco/R,
+Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
+extended by M. Loeberbauer & A. Woess, Univ. of Linz
+ported from C# to Java by Wolfgang Ahorner
+with improvements by Pat Terry, Rhodes University
+
+This program is free software; you can redistribute it and/or modify it 
+under the terms of the GNU General Public License as published by the 
+Free Software Foundation; either version 2, or (at your option) any 
+later version.
+
+This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+for more details.
+
+You should have received a copy of the GNU General Public License along 
+with this program; if not, write to the Free Software Foundation, Inc., 
+59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+As an exception, it is allowed to write an extension of Coco/R that is
+used as a plugin in non-free software.
+
+If not otherwise stated, any source code generated by Coco/R (other than 
+Coco/R itself) does not fall under the GNU General Public License.
+------------------------------------------------------------------------*/
+-->begin
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.Map;
+import java.util.HashMap;
+
+// Checkstyle: stop
+class Token {
+
+    public int kind; // token kind
+    public int pos; // token position in bytes in the source text (starting at 0)
+    public int charPos; // token position in characters in the source text (starting at 0)
+    public int col; // token column (starting at 1)
+    public int line; // token line (starting at 1)
+    public String val; // token value
+    public Token next; // ML 2005-03-11 Peek tokens are kept in linked list
+}
+
+// -----------------------------------------------------------------------------------
+// Buffer
+// -----------------------------------------------------------------------------------
+class Buffer {
+
+    // This Buffer supports the following cases:
+    // 1) seekable stream (file)
+    // a) whole stream in buffer
+    // b) part of stream in buffer
+    // 2) non seekable stream (network, console)
+
+    public static final int EOF = Character.MAX_VALUE + 1;
+    private static final int MIN_BUFFER_LENGTH = 1024; // 1KB
+    private static final int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB
+    private byte[] buf; // input buffer
+    private int bufStart; // position of first byte in buffer relative to input stream
+    private int bufLen; // length of buffer
+    private int fileLen; // length of input stream (may change if stream is no file)
+    private int bufPos; // current position in buffer
+    private RandomAccessFile file; // input stream (seekable)
+    private InputStream stream; // growing input stream (e.g.: console, network)
+
+    public Buffer(InputStream s) {
+        stream = s;
+        fileLen = bufLen = bufStart = bufPos = 0;
+        buf = new byte[MIN_BUFFER_LENGTH];
+    }
+
+    public Buffer(String fileName) {
+        try {
+            file = new RandomAccessFile(fileName, "r");
+            fileLen = (int) file.length();
+            bufLen = Math.min(fileLen, MAX_BUFFER_LENGTH);
+            buf = new byte[bufLen];
+            bufStart = Integer.MAX_VALUE; // nothing in buffer so far
+            if (fileLen > 0)
+                setPos(0); // setup buffer to position 0 (start)
+            else
+                bufPos = 0; // index 0 is already after the file, thus setPos(0) is invalid
+            if (bufLen == fileLen)
+                Close();
+        } catch (IOException e) {
+            throw new FatalError("Could not open file " + fileName);
+        }
+    }
+
+    // don't use b after this call anymore
+    // called in UTF8Buffer constructor
+    protected Buffer(Buffer b) {
+        buf = b.buf;
+        bufStart = b.bufStart;
+        bufLen = b.bufLen;
+        fileLen = b.fileLen;
+        bufPos = b.bufPos;
+        file = b.file;
+        stream = b.stream;
+        // keep finalize from closing the file
+        b.file = null;
+    }
+
+    @Override
+    protected void finalize() throws Throwable {
+        super.finalize();
+        Close();
+    }
+
+    protected void Close() {
+        if (file != null) {
+            try {
+                file.close();
+                file = null;
+            } catch (IOException e) {
+                throw new FatalError(e.getMessage());
+            }
+        }
+    }
+
+    public int Read() {
+        if (bufPos < bufLen) {
+            return buf[bufPos++] & 0xff; // mask out sign bits
+        } else if (getPos() < fileLen) {
+            setPos(getPos()); // shift buffer start to pos
+            return buf[bufPos++] & 0xff; // mask out sign bits
+        } else if (stream != null && ReadNextStreamChunk() > 0) {
+            return buf[bufPos++] & 0xff; // mask out sign bits
+        } else {
+            return EOF;
+        }
+    }
+
+    public int Peek() {
+        int curPos = getPos();
+        int ch = Read();
+        setPos(curPos);
+        return ch;
+    }
+
+    // beg .. begin, zero-based, inclusive, in byte
+    // end .. end, zero-based, exclusive, in byte
+    public String GetString(int beg, int end) {
+        int len = 0;
+        char[] buffer = new char[end - beg];
+        int oldPos = getPos();
+        setPos(beg);
+        while (getPos() < end)
+            buffer[len++] = (char) Read();
+        setPos(oldPos);
+        return new String(buffer, 0, len);
+    }
+
+    public int getPos() {
+        return bufPos + bufStart;
+    }
+
+    public void setPos(int value) {
+        if (value >= fileLen && stream != null) {
+            // Wanted position is after buffer and the stream
+            // is not seek-able e.g. network or console,
+            // thus we have to read the stream manually till
+            // the wanted position is in sight.
+            while (value >= fileLen && ReadNextStreamChunk() > 0) {
+                // nothing to do...
+            }
+        }
+
+        if (value < 0 || value > fileLen) {
+            throw new FatalError("buffer out of bounds access, position: " + value);
+        }
+
+        if (value >= bufStart && value < bufStart + bufLen) { // already in buffer
+            bufPos = value - bufStart;
+        } else if (file != null) { // must be swapped in
+            try {
+                file.seek(value);
+                bufLen = file.read(buf);
+                bufStart = value;
+                bufPos = 0;
+            } catch (IOException e) {
+                throw new FatalError(e.getMessage());
+            }
+        } else {
+            // set the position to the end of the file, Pos will return fileLen.
+            bufPos = fileLen - bufStart;
+        }
+    }
+
+    // Read the next chunk of bytes from the stream, increases the buffer
+    // if needed and updates the fields fileLen and bufLen.
+    // Returns the number of bytes read.
+    private int ReadNextStreamChunk() {
+        int free = buf.length - bufLen;
+        if (free == 0) {
+            // in the case of a growing input stream
+            // we can neither seek in the stream, nor can we
+            // foresee the maximum length, thus we must adapt
+            // the buffer size on demand.
+            byte[] newBuf = new byte[bufLen * 2];
+            System.arraycopy(buf, 0, newBuf, 0, bufLen);
+            buf = newBuf;
+            free = bufLen;
+        }
+
+        int read;
+        try {
+            read = stream.read(buf, bufLen, free);
+        } catch (IOException ioex) {
+            throw new FatalError(ioex.getMessage());
+        }
+
+        if (read > 0) {
+            fileLen = bufLen = (bufLen + read);
+            return read;
+        }
+        // end of stream reached
+        return 0;
+    }
+}
+
+// -----------------------------------------------------------------------------------
+// UTF8Buffer
+// -----------------------------------------------------------------------------------
+class UTF8Buffer extends Buffer {
+
+    UTF8Buffer(Buffer b) {
+        super(b);
+    }
+
+    @Override
+    public int Read() {
+        int ch;
+        do {
+            ch = super.Read();
+            // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
+        } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF));
+        if (ch < 128 || ch == EOF) {
+            // nothing to do, first 127 chars are the same in ascii and utf8
+            // 0xxxxxxx or end of file character
+        } else if ((ch & 0xF0) == 0xF0) {
+            // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+            int c1 = ch & 0x07;
+            ch = super.Read();
+            int c2 = ch & 0x3F;
+            ch = super.Read();
+            int c3 = ch & 0x3F;
+            ch = super.Read();
+            int c4 = ch & 0x3F;
+            ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
+        } else if ((ch & 0xE0) == 0xE0) {
+            // 1110xxxx 10xxxxxx 10xxxxxx
+            int c1 = ch & 0x0F;
+            ch = super.Read();
+            int c2 = ch & 0x3F;
+            ch = super.Read();
+            int c3 = ch & 0x3F;
+            ch = (((c1 << 6) | c2) << 6) | c3;
+        } else if ((ch & 0xC0) == 0xC0) {
+            // 110xxxxx 10xxxxxx
+            int c1 = ch & 0x1F;
+            ch = super.Read();
+            int c2 = ch & 0x3F;
+            ch = (c1 << 6) | c2;
+        }
+        return ch;
+    }
+}
+
+// -----------------------------------------------------------------------------------
+// StartStates -- maps characters to start states of tokens
+// -----------------------------------------------------------------------------------
+class StartStates {
+
+    private static class Elem {
+
+        public int key, val;
+        public Elem next;
+
+        public Elem(int key, int val) {
+            this.key = key;
+            this.val = val;
+        }
+    }
+
+    private Elem[] tab = new Elem[128];
+
+    public void set(int key, int val) {
+        Elem e = new Elem(key, val);
+        int k = key % 128;
+        e.next = tab[k];
+        tab[k] = e;
+    }
+
+    public int state(int key) {
+        Elem e = tab[key % 128];
+        while (e != null && e.key != key)
+            e = e.next;
+        return e == null ? 0 : e.val;
+    }
+}
+
+// -----------------------------------------------------------------------------------
+// Scanner
+// -----------------------------------------------------------------------------------
+@SuppressWarnings({"rawtypes", "unchecked"})
+public class Scanner {
+
+    static final char EOL = '\n';
+    static final int eofSym = 0;
+-->declarations
+
+    public Buffer buffer; // scanner buffer
+
+    Token t; // current token
+    int ch; // current input character
+    int pos; // byte position of current character
+    int charPos; // position by unicode characters starting with 0
+    int col; // column number of current character
+    int line; // line number of current character
+    int oldEols; // EOLs that appeared in a comment;
+    static final StartStates start; // maps initial token character to start state
+    static final Map literals; // maps literal strings to literal kinds
+
+    Token tokens; // list of tokens already peeked (first token is a dummy)
+    Token pt; // current peek token
+
+    char[] tval = new char[16]; // token text used in NextToken(), dynamically enlarged
+    int tlen; // length of current token
+
+    static {
+        start = new StartStates();
+        literals = new HashMap();
+-->initialization
+    }
+
+    public Scanner(String fileName) {
+        buffer = new Buffer(fileName);
+        Init();
+    }
+
+    public Scanner(InputStream s) {
+        buffer = new Buffer(s);
+        Init();
+    }
+
+    void Init() {
+        pos = -1;
+        line = 1;
+        col = 0;
+        charPos = -1;
+        oldEols = 0;
+        NextCh();
+        if (ch == 0xEF) { // check optional byte order mark for UTF-8
+            NextCh();
+            int ch1 = ch;
+            NextCh();
+            int ch2 = ch;
+            if (ch1 != 0xBB || ch2 != 0xBF) {
+                throw new FatalError("Illegal byte order mark at start of file");
+            }
+            buffer = new UTF8Buffer(buffer);
+            col = 0;
+            charPos = -1;
+            NextCh();
+        }
+        pt = tokens = new Token(); // first token is a dummy
+    }
+
+    void NextCh() {
+        if (oldEols > 0) {
+            ch = EOL;
+            oldEols--;
+        } else {
+            pos = buffer.getPos();
+            // buffer reads unicode chars, if UTF8 has been detected
+            ch = buffer.Read();
+            col++;
+            charPos++;
+            // replace isolated '\r' by '\n' in order to make
+            // eol handling uniform across Windows, Unix and Mac
+            if (ch == '\r' && buffer.Peek() != '\n')
+                ch = EOL;
+            if (ch == EOL) {
+                line++;
+                col = 0;
+            }
+        }
+-->casing
+    }
+
+    void AddCh() {
+        if (tlen >= tval.length) {
+            char[] newBuf = new char[2 * tval.length];
+            System.arraycopy(tval, 0, newBuf, 0, tval.length);
+            tval = newBuf;
+        }
+        if (ch != Buffer.EOF) {
+-->casing2
+            NextCh();
+        }
+    }
+	
+-->comments
+
+    void CheckLiteral() {
+        String val = t.val;
+-->casing3
+        Object kind = literals.get(val);
+        if (kind != null) {
+            t.kind = ((Integer) kind).intValue();
+        }
+    }
+
+    Token NextToken() {
+        while (ch == ' ' || 
+-->scan1
+		) NextCh();
+-->scan2
+        int recKind = noSym;
+        int recEnd = pos;
+        t = new Token();
+        t.pos = pos;
+        t.col = col;
+        t.line = line;
+        t.charPos = charPos;
+        int state = start.state(ch);
+        tlen = 0;
+        AddCh();
+
+        loop: for (;;) {
+            switch (state) {
+                case -1: {
+                    t.kind = eofSym;
+                    break loop;
+                } // NextCh already done
+                case 0: {
+                    if (recKind != noSym) {
+                        tlen = recEnd - t.pos;
+                        SetScannerBehindT();
+                    }
+                    t.kind = recKind;
+                    break loop;
+                } // NextCh already done
+-->scan3
+            }
+        }
+        t.val = new String(tval, 0, tlen);
+        return t;
+    }
+
+    private void SetScannerBehindT() {
+        buffer.setPos(t.pos);
+        NextCh();
+        line = t.line;
+        col = t.col;
+        charPos = t.charPos;
+        for (int i = 0; i < tlen; i++)
+            NextCh();
+    }
+
+    // get the next token (possibly a token already seen during peeking)
+    public Token Scan() {
+        if (tokens.next == null) {
+            return NextToken();
+        } else {
+            pt = tokens = tokens.next;
+            return tokens;
+        }
+    }
+
+    // get the next token, ignore pragmas
+    public Token Peek() {
+        do {
+            if (pt.next == null) {
+                pt.next = NextToken();
+            }
+            pt = pt.next;
+        } while (pt.kind > maxT); // skip pragmas
+
+        return pt;
+    }
+
+    // make sure that peeking starts at current scan position
+    public void ResetPeek() {
+        pt = tokens;
+    }
+
+    // The following methods are used for the CLNG Editor and will be called with java.Reflection.
+    // If the editor won't be used these 3 functions are obsolete,
+    // otherwise changes within the signature of the methods will result in Syntax Highlighting not working properly
+// anymore.
+
+    // get the offset of the next Token
+    public int getPeekTokenOffset() {
+        return pt.pos;
+    }
+
+    // get the String value of the Token
+    public String getPeekTokenVal() {
+        return pt.val;
+    }
+
+    // get the Kind value of the Token
+    public int getPeekTokenKind() {
+        return pt.kind;
+    }
+
+} // end Scanner
author	Christian Wimmer <christian.wimmer@oracle.com>
date	Fri, 21 Dec 2012 10:45:37 -0800
parents
children	dd1b2da27b38