comparison graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/Scanner.frame @ 7292:213c1297a814

Simple Language: A simple dynamic programming language to demonstrate Truffle features
author Christian Wimmer <christian.wimmer@oracle.com>
date Fri, 21 Dec 2012 10:45:37 -0800
parents
children dd1b2da27b38
comparison
equal deleted inserted replaced
7291:a748e4d44694 7292:213c1297a814
1 /*-------------------------------------------------------------------------
2 Compiler Generator Coco/R,
3 Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
4 extended by M. Loeberbauer & A. Woess, Univ. of Linz
5 ported from C# to Java by Wolfgang Ahorner
6 with improvements by Pat Terry, Rhodes University
7
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
12
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21
22 As an exception, it is allowed to write an extension of Coco/R that is
23 used as a plugin in non-free software.
24
25 If not otherwise stated, any source code generated by Coco/R (other than
26 Coco/R itself) does not fall under the GNU General Public License.
27 ------------------------------------------------------------------------*/
28 -->begin
29 import java.io.InputStream;
30 import java.io.IOException;
31 import java.io.RandomAccessFile;
32 import java.util.Map;
33 import java.util.HashMap;
34
35 // Checkstyle: stop
36 class Token {
37
38 public int kind; // token kind
39 public int pos; // token position in bytes in the source text (starting at 0)
40 public int charPos; // token position in characters in the source text (starting at 0)
41 public int col; // token column (starting at 1)
42 public int line; // token line (starting at 1)
43 public String val; // token value
44 public Token next; // ML 2005-03-11 Peek tokens are kept in linked list
45 }
46
47 // -----------------------------------------------------------------------------------
48 // Buffer
49 // -----------------------------------------------------------------------------------
50 class Buffer {
51
52 // This Buffer supports the following cases:
53 // 1) seekable stream (file)
54 // a) whole stream in buffer
55 // b) part of stream in buffer
56 // 2) non seekable stream (network, console)
57
58 public static final int EOF = Character.MAX_VALUE + 1;
59 private static final int MIN_BUFFER_LENGTH = 1024; // 1KB
60 private static final int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB
61 private byte[] buf; // input buffer
62 private int bufStart; // position of first byte in buffer relative to input stream
63 private int bufLen; // length of buffer
64 private int fileLen; // length of input stream (may change if stream is no file)
65 private int bufPos; // current position in buffer
66 private RandomAccessFile file; // input stream (seekable)
67 private InputStream stream; // growing input stream (e.g.: console, network)
68
69 public Buffer(InputStream s) {
70 stream = s;
71 fileLen = bufLen = bufStart = bufPos = 0;
72 buf = new byte[MIN_BUFFER_LENGTH];
73 }
74
75 public Buffer(String fileName) {
76 try {
77 file = new RandomAccessFile(fileName, "r");
78 fileLen = (int) file.length();
79 bufLen = Math.min(fileLen, MAX_BUFFER_LENGTH);
80 buf = new byte[bufLen];
81 bufStart = Integer.MAX_VALUE; // nothing in buffer so far
82 if (fileLen > 0)
83 setPos(0); // setup buffer to position 0 (start)
84 else
85 bufPos = 0; // index 0 is already after the file, thus setPos(0) is invalid
86 if (bufLen == fileLen)
87 Close();
88 } catch (IOException e) {
89 throw new FatalError("Could not open file " + fileName);
90 }
91 }
92
93 // don't use b after this call anymore
94 // called in UTF8Buffer constructor
95 protected Buffer(Buffer b) {
96 buf = b.buf;
97 bufStart = b.bufStart;
98 bufLen = b.bufLen;
99 fileLen = b.fileLen;
100 bufPos = b.bufPos;
101 file = b.file;
102 stream = b.stream;
103 // keep finalize from closing the file
104 b.file = null;
105 }
106
107 @Override
108 protected void finalize() throws Throwable {
109 super.finalize();
110 Close();
111 }
112
113 protected void Close() {
114 if (file != null) {
115 try {
116 file.close();
117 file = null;
118 } catch (IOException e) {
119 throw new FatalError(e.getMessage());
120 }
121 }
122 }
123
124 public int Read() {
125 if (bufPos < bufLen) {
126 return buf[bufPos++] & 0xff; // mask out sign bits
127 } else if (getPos() < fileLen) {
128 setPos(getPos()); // shift buffer start to pos
129 return buf[bufPos++] & 0xff; // mask out sign bits
130 } else if (stream != null && ReadNextStreamChunk() > 0) {
131 return buf[bufPos++] & 0xff; // mask out sign bits
132 } else {
133 return EOF;
134 }
135 }
136
137 public int Peek() {
138 int curPos = getPos();
139 int ch = Read();
140 setPos(curPos);
141 return ch;
142 }
143
144 // beg .. begin, zero-based, inclusive, in byte
145 // end .. end, zero-based, exclusive, in byte
146 public String GetString(int beg, int end) {
147 int len = 0;
148 char[] buffer = new char[end - beg];
149 int oldPos = getPos();
150 setPos(beg);
151 while (getPos() < end)
152 buffer[len++] = (char) Read();
153 setPos(oldPos);
154 return new String(buffer, 0, len);
155 }
156
157 public int getPos() {
158 return bufPos + bufStart;
159 }
160
161 public void setPos(int value) {
162 if (value >= fileLen && stream != null) {
163 // Wanted position is after buffer and the stream
164 // is not seek-able e.g. network or console,
165 // thus we have to read the stream manually till
166 // the wanted position is in sight.
167 while (value >= fileLen && ReadNextStreamChunk() > 0) {
168 // nothing to do...
169 }
170 }
171
172 if (value < 0 || value > fileLen) {
173 throw new FatalError("buffer out of bounds access, position: " + value);
174 }
175
176 if (value >= bufStart && value < bufStart + bufLen) { // already in buffer
177 bufPos = value - bufStart;
178 } else if (file != null) { // must be swapped in
179 try {
180 file.seek(value);
181 bufLen = file.read(buf);
182 bufStart = value;
183 bufPos = 0;
184 } catch (IOException e) {
185 throw new FatalError(e.getMessage());
186 }
187 } else {
188 // set the position to the end of the file, Pos will return fileLen.
189 bufPos = fileLen - bufStart;
190 }
191 }
192
193 // Read the next chunk of bytes from the stream, increases the buffer
194 // if needed and updates the fields fileLen and bufLen.
195 // Returns the number of bytes read.
196 private int ReadNextStreamChunk() {
197 int free = buf.length - bufLen;
198 if (free == 0) {
199 // in the case of a growing input stream
200 // we can neither seek in the stream, nor can we
201 // foresee the maximum length, thus we must adapt
202 // the buffer size on demand.
203 byte[] newBuf = new byte[bufLen * 2];
204 System.arraycopy(buf, 0, newBuf, 0, bufLen);
205 buf = newBuf;
206 free = bufLen;
207 }
208
209 int read;
210 try {
211 read = stream.read(buf, bufLen, free);
212 } catch (IOException ioex) {
213 throw new FatalError(ioex.getMessage());
214 }
215
216 if (read > 0) {
217 fileLen = bufLen = (bufLen + read);
218 return read;
219 }
220 // end of stream reached
221 return 0;
222 }
223 }
224
225 // -----------------------------------------------------------------------------------
226 // UTF8Buffer
227 // -----------------------------------------------------------------------------------
228 class UTF8Buffer extends Buffer {
229
230 UTF8Buffer(Buffer b) {
231 super(b);
232 }
233
234 @Override
235 public int Read() {
236 int ch;
237 do {
238 ch = super.Read();
239 // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
240 } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF));
241 if (ch < 128 || ch == EOF) {
242 // nothing to do, first 127 chars are the same in ascii and utf8
243 // 0xxxxxxx or end of file character
244 } else if ((ch & 0xF0) == 0xF0) {
245 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
246 int c1 = ch & 0x07;
247 ch = super.Read();
248 int c2 = ch & 0x3F;
249 ch = super.Read();
250 int c3 = ch & 0x3F;
251 ch = super.Read();
252 int c4 = ch & 0x3F;
253 ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
254 } else if ((ch & 0xE0) == 0xE0) {
255 // 1110xxxx 10xxxxxx 10xxxxxx
256 int c1 = ch & 0x0F;
257 ch = super.Read();
258 int c2 = ch & 0x3F;
259 ch = super.Read();
260 int c3 = ch & 0x3F;
261 ch = (((c1 << 6) | c2) << 6) | c3;
262 } else if ((ch & 0xC0) == 0xC0) {
263 // 110xxxxx 10xxxxxx
264 int c1 = ch & 0x1F;
265 ch = super.Read();
266 int c2 = ch & 0x3F;
267 ch = (c1 << 6) | c2;
268 }
269 return ch;
270 }
271 }
272
273 // -----------------------------------------------------------------------------------
274 // StartStates -- maps characters to start states of tokens
275 // -----------------------------------------------------------------------------------
276 class StartStates {
277
278 private static class Elem {
279
280 public int key, val;
281 public Elem next;
282
283 public Elem(int key, int val) {
284 this.key = key;
285 this.val = val;
286 }
287 }
288
289 private Elem[] tab = new Elem[128];
290
291 public void set(int key, int val) {
292 Elem e = new Elem(key, val);
293 int k = key % 128;
294 e.next = tab[k];
295 tab[k] = e;
296 }
297
298 public int state(int key) {
299 Elem e = tab[key % 128];
300 while (e != null && e.key != key)
301 e = e.next;
302 return e == null ? 0 : e.val;
303 }
304 }
305
306 // -----------------------------------------------------------------------------------
307 // Scanner
308 // -----------------------------------------------------------------------------------
309 @SuppressWarnings({"rawtypes", "unchecked"})
310 public class Scanner {
311
312 static final char EOL = '\n';
313 static final int eofSym = 0;
314 -->declarations
315
316 public Buffer buffer; // scanner buffer
317
318 Token t; // current token
319 int ch; // current input character
320 int pos; // byte position of current character
321 int charPos; // position by unicode characters starting with 0
322 int col; // column number of current character
323 int line; // line number of current character
324 int oldEols; // EOLs that appeared in a comment;
325 static final StartStates start; // maps initial token character to start state
326 static final Map literals; // maps literal strings to literal kinds
327
328 Token tokens; // list of tokens already peeked (first token is a dummy)
329 Token pt; // current peek token
330
331 char[] tval = new char[16]; // token text used in NextToken(), dynamically enlarged
332 int tlen; // length of current token
333
334 static {
335 start = new StartStates();
336 literals = new HashMap();
337 -->initialization
338 }
339
340 public Scanner(String fileName) {
341 buffer = new Buffer(fileName);
342 Init();
343 }
344
345 public Scanner(InputStream s) {
346 buffer = new Buffer(s);
347 Init();
348 }
349
350 void Init() {
351 pos = -1;
352 line = 1;
353 col = 0;
354 charPos = -1;
355 oldEols = 0;
356 NextCh();
357 if (ch == 0xEF) { // check optional byte order mark for UTF-8
358 NextCh();
359 int ch1 = ch;
360 NextCh();
361 int ch2 = ch;
362 if (ch1 != 0xBB || ch2 != 0xBF) {
363 throw new FatalError("Illegal byte order mark at start of file");
364 }
365 buffer = new UTF8Buffer(buffer);
366 col = 0;
367 charPos = -1;
368 NextCh();
369 }
370 pt = tokens = new Token(); // first token is a dummy
371 }
372
373 void NextCh() {
374 if (oldEols > 0) {
375 ch = EOL;
376 oldEols--;
377 } else {
378 pos = buffer.getPos();
379 // buffer reads unicode chars, if UTF8 has been detected
380 ch = buffer.Read();
381 col++;
382 charPos++;
383 // replace isolated '\r' by '\n' in order to make
384 // eol handling uniform across Windows, Unix and Mac
385 if (ch == '\r' && buffer.Peek() != '\n')
386 ch = EOL;
387 if (ch == EOL) {
388 line++;
389 col = 0;
390 }
391 }
392 -->casing
393 }
394
395 void AddCh() {
396 if (tlen >= tval.length) {
397 char[] newBuf = new char[2 * tval.length];
398 System.arraycopy(tval, 0, newBuf, 0, tval.length);
399 tval = newBuf;
400 }
401 if (ch != Buffer.EOF) {
402 -->casing2
403 NextCh();
404 }
405 }
406
407 -->comments
408
409 void CheckLiteral() {
410 String val = t.val;
411 -->casing3
412 Object kind = literals.get(val);
413 if (kind != null) {
414 t.kind = ((Integer) kind).intValue();
415 }
416 }
417
418 Token NextToken() {
419 while (ch == ' ' ||
420 -->scan1
421 ) NextCh();
422 -->scan2
423 int recKind = noSym;
424 int recEnd = pos;
425 t = new Token();
426 t.pos = pos;
427 t.col = col;
428 t.line = line;
429 t.charPos = charPos;
430 int state = start.state(ch);
431 tlen = 0;
432 AddCh();
433
434 loop: for (;;) {
435 switch (state) {
436 case -1: {
437 t.kind = eofSym;
438 break loop;
439 } // NextCh already done
440 case 0: {
441 if (recKind != noSym) {
442 tlen = recEnd - t.pos;
443 SetScannerBehindT();
444 }
445 t.kind = recKind;
446 break loop;
447 } // NextCh already done
448 -->scan3
449 }
450 }
451 t.val = new String(tval, 0, tlen);
452 return t;
453 }
454
455 private void SetScannerBehindT() {
456 buffer.setPos(t.pos);
457 NextCh();
458 line = t.line;
459 col = t.col;
460 charPos = t.charPos;
461 for (int i = 0; i < tlen; i++)
462 NextCh();
463 }
464
465 // get the next token (possibly a token already seen during peeking)
466 public Token Scan() {
467 if (tokens.next == null) {
468 return NextToken();
469 } else {
470 pt = tokens = tokens.next;
471 return tokens;
472 }
473 }
474
475 // get the next token, ignore pragmas
476 public Token Peek() {
477 do {
478 if (pt.next == null) {
479 pt.next = NextToken();
480 }
481 pt = pt.next;
482 } while (pt.kind > maxT); // skip pragmas
483
484 return pt;
485 }
486
487 // make sure that peeking starts at current scan position
488 public void ResetPeek() {
489 pt = tokens;
490 }
491
492 // The following methods are used for the CLNG Editor and will be called with java.Reflection.
493 // If the editor won't be used these 3 functions are obsolete,
494 // otherwise changes within the signature of the methods will result in Syntax Highlighting not working properly
495 // anymore.
496
497 // get the offset of the next Token
498 public int getPeekTokenOffset() {
499 return pt.pos;
500 }
501
502 // get the String value of the Token
503 public String getPeekTokenVal() {
504 return pt.val;
505 }
506
507 // get the Kind value of the Token
508 public int getPeekTokenKind() {
509 return pt.kind;
510 }
511
512 } // end Scanner