Mercurial > hg > graal-compiler
diff graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/Scanner.frame @ 7292:213c1297a814
Simple Language: A simple dynamic programming language to demonstrate Truffle features
author | Christian Wimmer <christian.wimmer@oracle.com> |
---|---|
date | Fri, 21 Dec 2012 10:45:37 -0800 |
parents | |
children | dd1b2da27b38 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/Scanner.frame Fri Dec 21 10:45:37 2012 -0800 @@ -0,0 +1,512 @@ +/*------------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +ported from C# to Java by Wolfgang Ahorner +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +------------------------------------------------------------------------*/ +-->begin +import java.io.InputStream; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.util.Map; +import java.util.HashMap; + +// Checkstyle: stop +class Token { + + public int kind; // token kind + public int pos; // token position in bytes in the source text (starting at 0) + public int charPos; // token position in characters in the source text (starting at 0) + public int col; // token column (starting at 1) + public int line; // token line (starting at 1) + public String val; // token value + public Token next; // ML 2005-03-11 Peek tokens are kept in linked list +} + +// ----------------------------------------------------------------------------------- +// Buffer +// ----------------------------------------------------------------------------------- +class Buffer { + + // This Buffer supports the following cases: + // 1) seekable stream (file) + // a) whole stream in buffer + // b) part of stream in buffer + // 2) non seekable stream (network, console) + + public static final int EOF = Character.MAX_VALUE + 1; + private static final int MIN_BUFFER_LENGTH = 1024; // 1KB + private static final int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB + private byte[] buf; // input buffer + private int bufStart; // position of first byte in buffer relative to input stream + private int bufLen; // length of buffer + private int fileLen; // length of input stream (may change if stream is no file) + private int bufPos; // current position in buffer + private RandomAccessFile file; // input stream (seekable) + private InputStream stream; // growing input stream (e.g.: console, network) + + public Buffer(InputStream s) { + stream = s; + fileLen = bufLen = bufStart = bufPos = 0; + buf = new byte[MIN_BUFFER_LENGTH]; + } + + public Buffer(String fileName) { + try { + file = new RandomAccessFile(fileName, "r"); + fileLen = (int) file.length(); + bufLen = Math.min(fileLen, MAX_BUFFER_LENGTH); + buf = new byte[bufLen]; + bufStart = Integer.MAX_VALUE; // nothing in buffer so far + if (fileLen > 0) + setPos(0); // setup buffer to position 0 (start) + else + bufPos = 0; // index 0 is already after the file, thus setPos(0) is invalid + if (bufLen == fileLen) + Close(); + } catch (IOException e) { + throw new FatalError("Could not open file " + fileName); + } + } + + // don't use b after this call anymore + // called in UTF8Buffer constructor + protected Buffer(Buffer b) { + buf = b.buf; + bufStart = b.bufStart; + bufLen = b.bufLen; + fileLen = b.fileLen; + bufPos = b.bufPos; + file = b.file; + stream = b.stream; + // keep finalize from closing the file + b.file = null; + } + + @Override + protected void finalize() throws Throwable { + super.finalize(); + Close(); + } + + protected void Close() { + if (file != null) { + try { + file.close(); + file = null; + } catch (IOException e) { + throw new FatalError(e.getMessage()); + } + } + } + + public int Read() { + if (bufPos < bufLen) { + return buf[bufPos++] & 0xff; // mask out sign bits + } else if (getPos() < fileLen) { + setPos(getPos()); // shift buffer start to pos + return buf[bufPos++] & 0xff; // mask out sign bits + } else if (stream != null && ReadNextStreamChunk() > 0) { + return buf[bufPos++] & 0xff; // mask out sign bits + } else { + return EOF; + } + } + + public int Peek() { + int curPos = getPos(); + int ch = Read(); + setPos(curPos); + return ch; + } + + // beg .. begin, zero-based, inclusive, in byte + // end .. end, zero-based, exclusive, in byte + public String GetString(int beg, int end) { + int len = 0; + char[] buffer = new char[end - beg]; + int oldPos = getPos(); + setPos(beg); + while (getPos() < end) + buffer[len++] = (char) Read(); + setPos(oldPos); + return new String(buffer, 0, len); + } + + public int getPos() { + return bufPos + bufStart; + } + + public void setPos(int value) { + if (value >= fileLen && stream != null) { + // Wanted position is after buffer and the stream + // is not seek-able e.g. network or console, + // thus we have to read the stream manually till + // the wanted position is in sight. + while (value >= fileLen && ReadNextStreamChunk() > 0) { + // nothing to do... + } + } + + if (value < 0 || value > fileLen) { + throw new FatalError("buffer out of bounds access, position: " + value); + } + + if (value >= bufStart && value < bufStart + bufLen) { // already in buffer + bufPos = value - bufStart; + } else if (file != null) { // must be swapped in + try { + file.seek(value); + bufLen = file.read(buf); + bufStart = value; + bufPos = 0; + } catch (IOException e) { + throw new FatalError(e.getMessage()); + } + } else { + // set the position to the end of the file, Pos will return fileLen. + bufPos = fileLen - bufStart; + } + } + + // Read the next chunk of bytes from the stream, increases the buffer + // if needed and updates the fields fileLen and bufLen. + // Returns the number of bytes read. + private int ReadNextStreamChunk() { + int free = buf.length - bufLen; + if (free == 0) { + // in the case of a growing input stream + // we can neither seek in the stream, nor can we + // foresee the maximum length, thus we must adapt + // the buffer size on demand. + byte[] newBuf = new byte[bufLen * 2]; + System.arraycopy(buf, 0, newBuf, 0, bufLen); + buf = newBuf; + free = bufLen; + } + + int read; + try { + read = stream.read(buf, bufLen, free); + } catch (IOException ioex) { + throw new FatalError(ioex.getMessage()); + } + + if (read > 0) { + fileLen = bufLen = (bufLen + read); + return read; + } + // end of stream reached + return 0; + } +} + +// ----------------------------------------------------------------------------------- +// UTF8Buffer +// ----------------------------------------------------------------------------------- +class UTF8Buffer extends Buffer { + + UTF8Buffer(Buffer b) { + super(b); + } + + @Override + public int Read() { + int ch; + do { + ch = super.Read(); + // until we find a utf8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF)); + if (ch < 128 || ch == EOF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; + ch = super.Read(); + int c2 = ch & 0x3F; + ch = super.Read(); + int c3 = ch & 0x3F; + ch = super.Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; + ch = super.Read(); + int c2 = ch & 0x3F; + ch = super.Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; + ch = super.Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; + } +} + +// ----------------------------------------------------------------------------------- +// StartStates -- maps characters to start states of tokens +// ----------------------------------------------------------------------------------- +class StartStates { + + private static class Elem { + + public int key, val; + public Elem next; + + public Elem(int key, int val) { + this.key = key; + this.val = val; + } + } + + private Elem[] tab = new Elem[128]; + + public void set(int key, int val) { + Elem e = new Elem(key, val); + int k = key % 128; + e.next = tab[k]; + tab[k] = e; + } + + public int state(int key) { + Elem e = tab[key % 128]; + while (e != null && e.key != key) + e = e.next; + return e == null ? 0 : e.val; + } +} + +// ----------------------------------------------------------------------------------- +// Scanner +// ----------------------------------------------------------------------------------- +@SuppressWarnings({"rawtypes", "unchecked"}) +public class Scanner { + + static final char EOL = '\n'; + static final int eofSym = 0; +-->declarations + + public Buffer buffer; // scanner buffer + + Token t; // current token + int ch; // current input character + int pos; // byte position of current character + int charPos; // position by unicode characters starting with 0 + int col; // column number of current character + int line; // line number of current character + int oldEols; // EOLs that appeared in a comment; + static final StartStates start; // maps initial token character to start state + static final Map literals; // maps literal strings to literal kinds + + Token tokens; // list of tokens already peeked (first token is a dummy) + Token pt; // current peek token + + char[] tval = new char[16]; // token text used in NextToken(), dynamically enlarged + int tlen; // length of current token + + static { + start = new StartStates(); + literals = new HashMap(); +-->initialization + } + + public Scanner(String fileName) { + buffer = new Buffer(fileName); + Init(); + } + + public Scanner(InputStream s) { + buffer = new Buffer(s); + Init(); + } + + void Init() { + pos = -1; + line = 1; + col = 0; + charPos = -1; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); + int ch1 = ch; + NextCh(); + int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + throw new FatalError("Illegal byte order mark at start of file"); + } + buffer = new UTF8Buffer(buffer); + col = 0; + charPos = -1; + NextCh(); + } + pt = tokens = new Token(); // first token is a dummy + } + + void NextCh() { + if (oldEols > 0) { + ch = EOL; + oldEols--; + } else { + pos = buffer.getPos(); + // buffer reads unicode chars, if UTF8 has been detected + ch = buffer.Read(); + col++; + charPos++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == '\r' && buffer.Peek() != '\n') + ch = EOL; + if (ch == EOL) { + line++; + col = 0; + } + } +-->casing + } + + void AddCh() { + if (tlen >= tval.length) { + char[] newBuf = new char[2 * tval.length]; + System.arraycopy(tval, 0, newBuf, 0, tval.length); + tval = newBuf; + } + if (ch != Buffer.EOF) { +-->casing2 + NextCh(); + } + } + +-->comments + + void CheckLiteral() { + String val = t.val; +-->casing3 + Object kind = literals.get(val); + if (kind != null) { + t.kind = ((Integer) kind).intValue(); + } + } + + Token NextToken() { + while (ch == ' ' || +-->scan1 + ) NextCh(); +-->scan2 + int recKind = noSym; + int recEnd = pos; + t = new Token(); + t.pos = pos; + t.col = col; + t.line = line; + t.charPos = charPos; + int state = start.state(ch); + tlen = 0; + AddCh(); + + loop: for (;;) { + switch (state) { + case -1: { + t.kind = eofSym; + break loop; + } // NextCh already done + case 0: { + if (recKind != noSym) { + tlen = recEnd - t.pos; + SetScannerBehindT(); + } + t.kind = recKind; + break loop; + } // NextCh already done +-->scan3 + } + } + t.val = new String(tval, 0, tlen); + return t; + } + + private void SetScannerBehindT() { + buffer.setPos(t.pos); + NextCh(); + line = t.line; + col = t.col; + charPos = t.charPos; + for (int i = 0; i < tlen; i++) + NextCh(); + } + + // get the next token (possibly a token already seen during peeking) + public Token Scan() { + if (tokens.next == null) { + return NextToken(); + } else { + pt = tokens = tokens.next; + return tokens; + } + } + + // get the next token, ignore pragmas + public Token Peek() { + do { + if (pt.next == null) { + pt.next = NextToken(); + } + pt = pt.next; + } while (pt.kind > maxT); // skip pragmas + + return pt; + } + + // make sure that peeking starts at current scan position + public void ResetPeek() { + pt = tokens; + } + + // The following methods are used for the CLNG Editor and will be called with java.Reflection. + // If the editor won't be used these 3 functions are obsolete, + // otherwise changes within the signature of the methods will result in Syntax Highlighting not working properly +// anymore. + + // get the offset of the next Token + public int getPeekTokenOffset() { + return pt.pos; + } + + // get the String value of the Token + public String getPeekTokenVal() { + return pt.val; + } + + // get the Kind value of the Token + public int getPeekTokenKind() { + return pt.kind; + } + +} // end Scanner