comparison graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/Scanner.java @ 7292:213c1297a814

Simple Language: A simple dynamic programming language to demonstrate Truffle features
author Christian Wimmer <christian.wimmer@oracle.com>
date Fri, 21 Dec 2012 10:45:37 -0800
parents
children b6743d7eb8d4
comparison
equal deleted inserted replaced
7291:a748e4d44694 7292:213c1297a814
1 /*
2 * Copyright (c) 2012, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 // The content of this file is automatically generated. DO NOT EDIT.
25
26 package com.oracle.truffle.sl.parser;
27
28 import java.io.InputStream;
29 import java.io.IOException;
30 import java.io.RandomAccessFile;
31 import java.util.Map;
32 import java.util.HashMap;
33
34 // Checkstyle: stop
35 class Token {
36
37 public int kind; // token kind
38 public int pos; // token position in bytes in the source text (starting at 0)
39 public int charPos; // token position in characters in the source text (starting at 0)
40 public int col; // token column (starting at 1)
41 public int line; // token line (starting at 1)
42 public String val; // token value
43 public Token next; // ML 2005-03-11 Peek tokens are kept in linked list
44 }
45
46 // -----------------------------------------------------------------------------------
47 // Buffer
48 // -----------------------------------------------------------------------------------
49 class Buffer {
50
51 // This Buffer supports the following cases:
52 // 1) seekable stream (file)
53 // a) whole stream in buffer
54 // b) part of stream in buffer
55 // 2) non seekable stream (network, console)
56
57 public static final int EOF = Character.MAX_VALUE + 1;
58 private static final int MIN_BUFFER_LENGTH = 1024; // 1KB
59 private static final int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB
60 private byte[] buf; // input buffer
61 private int bufStart; // position of first byte in buffer relative to input stream
62 private int bufLen; // length of buffer
63 private int fileLen; // length of input stream (may change if stream is no file)
64 private int bufPos; // current position in buffer
65 private RandomAccessFile file; // input stream (seekable)
66 private InputStream stream; // growing input stream (e.g.: console, network)
67
68 public Buffer(InputStream s) {
69 stream = s;
70 fileLen = bufLen = bufStart = bufPos = 0;
71 buf = new byte[MIN_BUFFER_LENGTH];
72 }
73
74 public Buffer(String fileName) {
75 try {
76 file = new RandomAccessFile(fileName, "r");
77 fileLen = (int) file.length();
78 bufLen = Math.min(fileLen, MAX_BUFFER_LENGTH);
79 buf = new byte[bufLen];
80 bufStart = Integer.MAX_VALUE; // nothing in buffer so far
81 if (fileLen > 0)
82 setPos(0); // setup buffer to position 0 (start)
83 else
84 bufPos = 0; // index 0 is already after the file, thus setPos(0) is invalid
85 if (bufLen == fileLen)
86 Close();
87 } catch (IOException e) {
88 throw new FatalError("Could not open file " + fileName);
89 }
90 }
91
92 // don't use b after this call anymore
93 // called in UTF8Buffer constructor
94 protected Buffer(Buffer b) {
95 buf = b.buf;
96 bufStart = b.bufStart;
97 bufLen = b.bufLen;
98 fileLen = b.fileLen;
99 bufPos = b.bufPos;
100 file = b.file;
101 stream = b.stream;
102 // keep finalize from closing the file
103 b.file = null;
104 }
105
106 @Override
107 protected void finalize() throws Throwable {
108 super.finalize();
109 Close();
110 }
111
112 protected void Close() {
113 if (file != null) {
114 try {
115 file.close();
116 file = null;
117 } catch (IOException e) {
118 throw new FatalError(e.getMessage());
119 }
120 }
121 }
122
123 public int Read() {
124 if (bufPos < bufLen) {
125 return buf[bufPos++] & 0xff; // mask out sign bits
126 } else if (getPos() < fileLen) {
127 setPos(getPos()); // shift buffer start to pos
128 return buf[bufPos++] & 0xff; // mask out sign bits
129 } else if (stream != null && ReadNextStreamChunk() > 0) {
130 return buf[bufPos++] & 0xff; // mask out sign bits
131 } else {
132 return EOF;
133 }
134 }
135
136 public int Peek() {
137 int curPos = getPos();
138 int ch = Read();
139 setPos(curPos);
140 return ch;
141 }
142
143 // beg .. begin, zero-based, inclusive, in byte
144 // end .. end, zero-based, exclusive, in byte
145 public String GetString(int beg, int end) {
146 int len = 0;
147 char[] buffer = new char[end - beg];
148 int oldPos = getPos();
149 setPos(beg);
150 while (getPos() < end)
151 buffer[len++] = (char) Read();
152 setPos(oldPos);
153 return new String(buffer, 0, len);
154 }
155
156 public int getPos() {
157 return bufPos + bufStart;
158 }
159
160 public void setPos(int value) {
161 if (value >= fileLen && stream != null) {
162 // Wanted position is after buffer and the stream
163 // is not seek-able e.g. network or console,
164 // thus we have to read the stream manually till
165 // the wanted position is in sight.
166 while (value >= fileLen && ReadNextStreamChunk() > 0) {
167 // nothing to do...
168 }
169 }
170
171 if (value < 0 || value > fileLen) {
172 throw new FatalError("buffer out of bounds access, position: " + value);
173 }
174
175 if (value >= bufStart && value < bufStart + bufLen) { // already in buffer
176 bufPos = value - bufStart;
177 } else if (file != null) { // must be swapped in
178 try {
179 file.seek(value);
180 bufLen = file.read(buf);
181 bufStart = value;
182 bufPos = 0;
183 } catch (IOException e) {
184 throw new FatalError(e.getMessage());
185 }
186 } else {
187 // set the position to the end of the file, Pos will return fileLen.
188 bufPos = fileLen - bufStart;
189 }
190 }
191
192 // Read the next chunk of bytes from the stream, increases the buffer
193 // if needed and updates the fields fileLen and bufLen.
194 // Returns the number of bytes read.
195 private int ReadNextStreamChunk() {
196 int free = buf.length - bufLen;
197 if (free == 0) {
198 // in the case of a growing input stream
199 // we can neither seek in the stream, nor can we
200 // foresee the maximum length, thus we must adapt
201 // the buffer size on demand.
202 byte[] newBuf = new byte[bufLen * 2];
203 System.arraycopy(buf, 0, newBuf, 0, bufLen);
204 buf = newBuf;
205 free = bufLen;
206 }
207
208 int read;
209 try {
210 read = stream.read(buf, bufLen, free);
211 } catch (IOException ioex) {
212 throw new FatalError(ioex.getMessage());
213 }
214
215 if (read > 0) {
216 fileLen = bufLen = (bufLen + read);
217 return read;
218 }
219 // end of stream reached
220 return 0;
221 }
222 }
223
224 // -----------------------------------------------------------------------------------
225 // UTF8Buffer
226 // -----------------------------------------------------------------------------------
227 class UTF8Buffer extends Buffer {
228
229 UTF8Buffer(Buffer b) {
230 super(b);
231 }
232
233 @Override
234 public int Read() {
235 int ch;
236 do {
237 ch = super.Read();
238 // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
239 } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF));
240 if (ch < 128 || ch == EOF) {
241 // nothing to do, first 127 chars are the same in ascii and utf8
242 // 0xxxxxxx or end of file character
243 } else if ((ch & 0xF0) == 0xF0) {
244 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
245 int c1 = ch & 0x07;
246 ch = super.Read();
247 int c2 = ch & 0x3F;
248 ch = super.Read();
249 int c3 = ch & 0x3F;
250 ch = super.Read();
251 int c4 = ch & 0x3F;
252 ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
253 } else if ((ch & 0xE0) == 0xE0) {
254 // 1110xxxx 10xxxxxx 10xxxxxx
255 int c1 = ch & 0x0F;
256 ch = super.Read();
257 int c2 = ch & 0x3F;
258 ch = super.Read();
259 int c3 = ch & 0x3F;
260 ch = (((c1 << 6) | c2) << 6) | c3;
261 } else if ((ch & 0xC0) == 0xC0) {
262 // 110xxxxx 10xxxxxx
263 int c1 = ch & 0x1F;
264 ch = super.Read();
265 int c2 = ch & 0x3F;
266 ch = (c1 << 6) | c2;
267 }
268 return ch;
269 }
270 }
271
272 // -----------------------------------------------------------------------------------
273 // StartStates -- maps characters to start states of tokens
274 // -----------------------------------------------------------------------------------
275 class StartStates {
276
277 private static class Elem {
278
279 public int key, val;
280 public Elem next;
281
282 public Elem(int key, int val) {
283 this.key = key;
284 this.val = val;
285 }
286 }
287
288 private Elem[] tab = new Elem[128];
289
290 public void set(int key, int val) {
291 Elem e = new Elem(key, val);
292 int k = key % 128;
293 e.next = tab[k];
294 tab[k] = e;
295 }
296
297 public int state(int key) {
298 Elem e = tab[key % 128];
299 while (e != null && e.key != key)
300 e = e.next;
301 return e == null ? 0 : e.val;
302 }
303 }
304
305 // -----------------------------------------------------------------------------------
306 // Scanner
307 // -----------------------------------------------------------------------------------
308 @SuppressWarnings({"rawtypes", "unchecked"})
309 public class Scanner {
310
311 static final char EOL = '\n';
312 static final int eofSym = 0;
313 static final int maxT = 25;
314 static final int noSym = 25;
315
316
317 public Buffer buffer; // scanner buffer
318
319 Token t; // current token
320 int ch; // current input character
321 int pos; // byte position of current character
322 int charPos; // position by unicode characters starting with 0
323 int col; // column number of current character
324 int line; // line number of current character
325 int oldEols; // EOLs that appeared in a comment;
326 static final StartStates start; // maps initial token character to start state
327 static final Map literals; // maps literal strings to literal kinds
328
329 Token tokens; // list of tokens already peeked (first token is a dummy)
330 Token pt; // current peek token
331
332 char[] tval = new char[16]; // token text used in NextToken(), dynamically enlarged
333 int tlen; // length of current token
334
335 static {
336 start = new StartStates();
337 literals = new HashMap();
338 for (int i = 65; i <= 90; ++i) start.set(i, 1);
339 for (int i = 97; i <= 122; ++i) start.set(i, 1);
340 for (int i = 49; i <= 57; ++i) start.set(i, 4);
341 start.set(34, 2);
342 start.set(48, 5);
343 start.set(123, 6);
344 start.set(125, 7);
345 start.set(40, 8);
346 start.set(41, 9);
347 start.set(61, 20);
348 start.set(59, 10);
349 start.set(60, 21);
350 start.set(62, 22);
351 start.set(33, 14);
352 start.set(43, 16);
353 start.set(45, 17);
354 start.set(42, 18);
355 start.set(47, 19);
356 start.set(Buffer.EOF, -1);
357 literals.put("function", new Integer(4));
358 literals.put("while", new Integer(7));
359 literals.put("print", new Integer(12));
360 literals.put("return", new Integer(13));
361 literals.put("time", new Integer(24));
362
363 }
364
365 public Scanner(String fileName) {
366 buffer = new Buffer(fileName);
367 Init();
368 }
369
370 public Scanner(InputStream s) {
371 buffer = new Buffer(s);
372 Init();
373 }
374
375 void Init() {
376 pos = -1;
377 line = 1;
378 col = 0;
379 charPos = -1;
380 oldEols = 0;
381 NextCh();
382 if (ch == 0xEF) { // check optional byte order mark for UTF-8
383 NextCh();
384 int ch1 = ch;
385 NextCh();
386 int ch2 = ch;
387 if (ch1 != 0xBB || ch2 != 0xBF) {
388 throw new FatalError("Illegal byte order mark at start of file");
389 }
390 buffer = new UTF8Buffer(buffer);
391 col = 0;
392 charPos = -1;
393 NextCh();
394 }
395 pt = tokens = new Token(); // first token is a dummy
396 }
397
398 void NextCh() {
399 if (oldEols > 0) {
400 ch = EOL;
401 oldEols--;
402 } else {
403 pos = buffer.getPos();
404 // buffer reads unicode chars, if UTF8 has been detected
405 ch = buffer.Read();
406 col++;
407 charPos++;
408 // replace isolated '\r' by '\n' in order to make
409 // eol handling uniform across Windows, Unix and Mac
410 if (ch == '\r' && buffer.Peek() != '\n')
411 ch = EOL;
412 if (ch == EOL) {
413 line++;
414 col = 0;
415 }
416 }
417
418 }
419
420 void AddCh() {
421 if (tlen >= tval.length) {
422 char[] newBuf = new char[2 * tval.length];
423 System.arraycopy(tval, 0, newBuf, 0, tval.length);
424 tval = newBuf;
425 }
426 if (ch != Buffer.EOF) {
427 tval[tlen++] = (char)ch;
428
429 NextCh();
430 }
431 }
432
433
434 boolean Comment0() {
435 int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
436 NextCh();
437 if (ch == '/') {
438 NextCh();
439 for(;;) {
440 if (ch == 10) {
441 level--;
442 if (level == 0) { oldEols = line - line0; NextCh(); return true; }
443 NextCh();
444 } else if (ch == Buffer.EOF) return false;
445 else NextCh();
446 }
447 } else {
448 buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
449 }
450 return false;
451 }
452
453 boolean Comment1() {
454 int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
455 NextCh();
456 if (ch == '*') {
457 NextCh();
458 for(;;) {
459 if (ch == '*') {
460 NextCh();
461 if (ch == '/') {
462 level--;
463 if (level == 0) { oldEols = line - line0; NextCh(); return true; }
464 NextCh();
465 }
466 } else if (ch == Buffer.EOF) return false;
467 else NextCh();
468 }
469 } else {
470 buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
471 }
472 return false;
473 }
474
475
476 void CheckLiteral() {
477 String val = t.val;
478
479 Object kind = literals.get(val);
480 if (kind != null) {
481 t.kind = ((Integer) kind).intValue();
482 }
483 }
484
485 Token NextToken() {
486 while (ch == ' ' ||
487 ch >= 9 && ch <= 10 || ch == 13
488 ) NextCh();
489 if (ch == '/' && Comment0() ||ch == '/' && Comment1()) return NextToken();
490 int recKind = noSym;
491 int recEnd = pos;
492 t = new Token();
493 t.pos = pos;
494 t.col = col;
495 t.line = line;
496 t.charPos = charPos;
497 int state = start.state(ch);
498 tlen = 0;
499 AddCh();
500
501 loop: for (;;) {
502 switch (state) {
503 case -1: {
504 t.kind = eofSym;
505 break loop;
506 } // NextCh already done
507 case 0: {
508 if (recKind != noSym) {
509 tlen = recEnd - t.pos;
510 SetScannerBehindT();
511 }
512 t.kind = recKind;
513 break loop;
514 } // NextCh already done
515 case 1:
516 recEnd = pos; recKind = 1;
517 if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z') {AddCh(); state = 1; break;}
518 else {t.kind = 1; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;}
519 case 2:
520 if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); state = 2; break;}
521 else if (ch == '"') {AddCh(); state = 3; break;}
522 else {state = 0; break;}
523 case 3:
524 {t.kind = 2; break loop;}
525 case 4:
526 recEnd = pos; recKind = 3;
527 if (ch >= '0' && ch <= '9') {AddCh(); state = 4; break;}
528 else {t.kind = 3; break loop;}
529 case 5:
530 {t.kind = 3; break loop;}
531 case 6:
532 {t.kind = 5; break loop;}
533 case 7:
534 {t.kind = 6; break loop;}
535 case 8:
536 {t.kind = 8; break loop;}
537 case 9:
538 {t.kind = 9; break loop;}
539 case 10:
540 {t.kind = 11; break loop;}
541 case 11:
542 {t.kind = 16; break loop;}
543 case 12:
544 {t.kind = 17; break loop;}
545 case 13:
546 {t.kind = 18; break loop;}
547 case 14:
548 if (ch == '=') {AddCh(); state = 15; break;}
549 else {state = 0; break;}
550 case 15:
551 {t.kind = 19; break loop;}
552 case 16:
553 {t.kind = 20; break loop;}
554 case 17:
555 {t.kind = 21; break loop;}
556 case 18:
557 {t.kind = 22; break loop;}
558 case 19:
559 {t.kind = 23; break loop;}
560 case 20:
561 recEnd = pos; recKind = 10;
562 if (ch == '=') {AddCh(); state = 13; break;}
563 else {t.kind = 10; break loop;}
564 case 21:
565 recEnd = pos; recKind = 14;
566 if (ch == '=') {AddCh(); state = 11; break;}
567 else {t.kind = 14; break loop;}
568 case 22:
569 recEnd = pos; recKind = 15;
570 if (ch == '=') {AddCh(); state = 12; break;}
571 else {t.kind = 15; break loop;}
572
573 }
574 }
575 t.val = new String(tval, 0, tlen);
576 return t;
577 }
578
579 private void SetScannerBehindT() {
580 buffer.setPos(t.pos);
581 NextCh();
582 line = t.line;
583 col = t.col;
584 charPos = t.charPos;
585 for (int i = 0; i < tlen; i++)
586 NextCh();
587 }
588
589 // get the next token (possibly a token already seen during peeking)
590 public Token Scan() {
591 if (tokens.next == null) {
592 return NextToken();
593 } else {
594 pt = tokens = tokens.next;
595 return tokens;
596 }
597 }
598
599 // get the next token, ignore pragmas
600 public Token Peek() {
601 do {
602 if (pt.next == null) {
603 pt.next = NextToken();
604 }
605 pt = pt.next;
606 } while (pt.kind > maxT); // skip pragmas
607
608 return pt;
609 }
610
611 // make sure that peeking starts at current scan position
612 public void ResetPeek() {
613 pt = tokens;
614 }
615
616 // The following methods are used for the CLNG Editor and will be called with java.Reflection.
617 // If the editor won't be used these 3 functions are obsolete,
618 // otherwise changes within the signature of the methods will result in Syntax Highlighting not working properly
619 // anymore.
620
621 // get the offset of the next Token
622 public int getPeekTokenOffset() {
623 return pt.pos;
624 }
625
626 // get the String value of the Token
627 public String getPeekTokenVal() {
628 return pt.val;
629 }
630
631 // get the Kind value of the Token
632 public int getPeekTokenKind() {
633 return pt.kind;
634 }
635
636 } // end Scanner