Mercurial > hg > graal-compiler
comparison graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/Scanner.java @ 7292:213c1297a814
Simple Language: A simple dynamic programming language to demonstrate Truffle features
author | Christian Wimmer <christian.wimmer@oracle.com> |
---|---|
date | Fri, 21 Dec 2012 10:45:37 -0800 |
parents | |
children | b6743d7eb8d4 |
comparison
equal
deleted
inserted
replaced
7291:a748e4d44694 | 7292:213c1297a814 |
---|---|
1 /* | |
2 * Copyright (c) 2012, 2012, Oracle and/or its affiliates. All rights reserved. | |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 * or visit www.oracle.com if you need additional information or have any | |
21 * questions. | |
22 */ | |
23 | |
24 // The content of this file is automatically generated. DO NOT EDIT. | |
25 | |
26 package com.oracle.truffle.sl.parser; | |
27 | |
28 import java.io.InputStream; | |
29 import java.io.IOException; | |
30 import java.io.RandomAccessFile; | |
31 import java.util.Map; | |
32 import java.util.HashMap; | |
33 | |
34 // Checkstyle: stop | |
35 class Token { | |
36 | |
37 public int kind; // token kind | |
38 public int pos; // token position in bytes in the source text (starting at 0) | |
39 public int charPos; // token position in characters in the source text (starting at 0) | |
40 public int col; // token column (starting at 1) | |
41 public int line; // token line (starting at 1) | |
42 public String val; // token value | |
43 public Token next; // ML 2005-03-11 Peek tokens are kept in linked list | |
44 } | |
45 | |
46 // ----------------------------------------------------------------------------------- | |
47 // Buffer | |
48 // ----------------------------------------------------------------------------------- | |
49 class Buffer { | |
50 | |
51 // This Buffer supports the following cases: | |
52 // 1) seekable stream (file) | |
53 // a) whole stream in buffer | |
54 // b) part of stream in buffer | |
55 // 2) non seekable stream (network, console) | |
56 | |
57 public static final int EOF = Character.MAX_VALUE + 1; | |
58 private static final int MIN_BUFFER_LENGTH = 1024; // 1KB | |
59 private static final int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB | |
60 private byte[] buf; // input buffer | |
61 private int bufStart; // position of first byte in buffer relative to input stream | |
62 private int bufLen; // length of buffer | |
63 private int fileLen; // length of input stream (may change if stream is no file) | |
64 private int bufPos; // current position in buffer | |
65 private RandomAccessFile file; // input stream (seekable) | |
66 private InputStream stream; // growing input stream (e.g.: console, network) | |
67 | |
68 public Buffer(InputStream s) { | |
69 stream = s; | |
70 fileLen = bufLen = bufStart = bufPos = 0; | |
71 buf = new byte[MIN_BUFFER_LENGTH]; | |
72 } | |
73 | |
74 public Buffer(String fileName) { | |
75 try { | |
76 file = new RandomAccessFile(fileName, "r"); | |
77 fileLen = (int) file.length(); | |
78 bufLen = Math.min(fileLen, MAX_BUFFER_LENGTH); | |
79 buf = new byte[bufLen]; | |
80 bufStart = Integer.MAX_VALUE; // nothing in buffer so far | |
81 if (fileLen > 0) | |
82 setPos(0); // setup buffer to position 0 (start) | |
83 else | |
84 bufPos = 0; // index 0 is already after the file, thus setPos(0) is invalid | |
85 if (bufLen == fileLen) | |
86 Close(); | |
87 } catch (IOException e) { | |
88 throw new FatalError("Could not open file " + fileName); | |
89 } | |
90 } | |
91 | |
92 // don't use b after this call anymore | |
93 // called in UTF8Buffer constructor | |
94 protected Buffer(Buffer b) { | |
95 buf = b.buf; | |
96 bufStart = b.bufStart; | |
97 bufLen = b.bufLen; | |
98 fileLen = b.fileLen; | |
99 bufPos = b.bufPos; | |
100 file = b.file; | |
101 stream = b.stream; | |
102 // keep finalize from closing the file | |
103 b.file = null; | |
104 } | |
105 | |
106 @Override | |
107 protected void finalize() throws Throwable { | |
108 super.finalize(); | |
109 Close(); | |
110 } | |
111 | |
112 protected void Close() { | |
113 if (file != null) { | |
114 try { | |
115 file.close(); | |
116 file = null; | |
117 } catch (IOException e) { | |
118 throw new FatalError(e.getMessage()); | |
119 } | |
120 } | |
121 } | |
122 | |
123 public int Read() { | |
124 if (bufPos < bufLen) { | |
125 return buf[bufPos++] & 0xff; // mask out sign bits | |
126 } else if (getPos() < fileLen) { | |
127 setPos(getPos()); // shift buffer start to pos | |
128 return buf[bufPos++] & 0xff; // mask out sign bits | |
129 } else if (stream != null && ReadNextStreamChunk() > 0) { | |
130 return buf[bufPos++] & 0xff; // mask out sign bits | |
131 } else { | |
132 return EOF; | |
133 } | |
134 } | |
135 | |
136 public int Peek() { | |
137 int curPos = getPos(); | |
138 int ch = Read(); | |
139 setPos(curPos); | |
140 return ch; | |
141 } | |
142 | |
143 // beg .. begin, zero-based, inclusive, in byte | |
144 // end .. end, zero-based, exclusive, in byte | |
145 public String GetString(int beg, int end) { | |
146 int len = 0; | |
147 char[] buffer = new char[end - beg]; | |
148 int oldPos = getPos(); | |
149 setPos(beg); | |
150 while (getPos() < end) | |
151 buffer[len++] = (char) Read(); | |
152 setPos(oldPos); | |
153 return new String(buffer, 0, len); | |
154 } | |
155 | |
156 public int getPos() { | |
157 return bufPos + bufStart; | |
158 } | |
159 | |
160 public void setPos(int value) { | |
161 if (value >= fileLen && stream != null) { | |
162 // Wanted position is after buffer and the stream | |
163 // is not seek-able e.g. network or console, | |
164 // thus we have to read the stream manually till | |
165 // the wanted position is in sight. | |
166 while (value >= fileLen && ReadNextStreamChunk() > 0) { | |
167 // nothing to do... | |
168 } | |
169 } | |
170 | |
171 if (value < 0 || value > fileLen) { | |
172 throw new FatalError("buffer out of bounds access, position: " + value); | |
173 } | |
174 | |
175 if (value >= bufStart && value < bufStart + bufLen) { // already in buffer | |
176 bufPos = value - bufStart; | |
177 } else if (file != null) { // must be swapped in | |
178 try { | |
179 file.seek(value); | |
180 bufLen = file.read(buf); | |
181 bufStart = value; | |
182 bufPos = 0; | |
183 } catch (IOException e) { | |
184 throw new FatalError(e.getMessage()); | |
185 } | |
186 } else { | |
187 // set the position to the end of the file, Pos will return fileLen. | |
188 bufPos = fileLen - bufStart; | |
189 } | |
190 } | |
191 | |
192 // Read the next chunk of bytes from the stream, increases the buffer | |
193 // if needed and updates the fields fileLen and bufLen. | |
194 // Returns the number of bytes read. | |
195 private int ReadNextStreamChunk() { | |
196 int free = buf.length - bufLen; | |
197 if (free == 0) { | |
198 // in the case of a growing input stream | |
199 // we can neither seek in the stream, nor can we | |
200 // foresee the maximum length, thus we must adapt | |
201 // the buffer size on demand. | |
202 byte[] newBuf = new byte[bufLen * 2]; | |
203 System.arraycopy(buf, 0, newBuf, 0, bufLen); | |
204 buf = newBuf; | |
205 free = bufLen; | |
206 } | |
207 | |
208 int read; | |
209 try { | |
210 read = stream.read(buf, bufLen, free); | |
211 } catch (IOException ioex) { | |
212 throw new FatalError(ioex.getMessage()); | |
213 } | |
214 | |
215 if (read > 0) { | |
216 fileLen = bufLen = (bufLen + read); | |
217 return read; | |
218 } | |
219 // end of stream reached | |
220 return 0; | |
221 } | |
222 } | |
223 | |
224 // ----------------------------------------------------------------------------------- | |
225 // UTF8Buffer | |
226 // ----------------------------------------------------------------------------------- | |
227 class UTF8Buffer extends Buffer { | |
228 | |
229 UTF8Buffer(Buffer b) { | |
230 super(b); | |
231 } | |
232 | |
233 @Override | |
234 public int Read() { | |
235 int ch; | |
236 do { | |
237 ch = super.Read(); | |
238 // until we find a utf8 start (0xxxxxxx or 11xxxxxx) | |
239 } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF)); | |
240 if (ch < 128 || ch == EOF) { | |
241 // nothing to do, first 127 chars are the same in ascii and utf8 | |
242 // 0xxxxxxx or end of file character | |
243 } else if ((ch & 0xF0) == 0xF0) { | |
244 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
245 int c1 = ch & 0x07; | |
246 ch = super.Read(); | |
247 int c2 = ch & 0x3F; | |
248 ch = super.Read(); | |
249 int c3 = ch & 0x3F; | |
250 ch = super.Read(); | |
251 int c4 = ch & 0x3F; | |
252 ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; | |
253 } else if ((ch & 0xE0) == 0xE0) { | |
254 // 1110xxxx 10xxxxxx 10xxxxxx | |
255 int c1 = ch & 0x0F; | |
256 ch = super.Read(); | |
257 int c2 = ch & 0x3F; | |
258 ch = super.Read(); | |
259 int c3 = ch & 0x3F; | |
260 ch = (((c1 << 6) | c2) << 6) | c3; | |
261 } else if ((ch & 0xC0) == 0xC0) { | |
262 // 110xxxxx 10xxxxxx | |
263 int c1 = ch & 0x1F; | |
264 ch = super.Read(); | |
265 int c2 = ch & 0x3F; | |
266 ch = (c1 << 6) | c2; | |
267 } | |
268 return ch; | |
269 } | |
270 } | |
271 | |
272 // ----------------------------------------------------------------------------------- | |
273 // StartStates -- maps characters to start states of tokens | |
274 // ----------------------------------------------------------------------------------- | |
275 class StartStates { | |
276 | |
277 private static class Elem { | |
278 | |
279 public int key, val; | |
280 public Elem next; | |
281 | |
282 public Elem(int key, int val) { | |
283 this.key = key; | |
284 this.val = val; | |
285 } | |
286 } | |
287 | |
288 private Elem[] tab = new Elem[128]; | |
289 | |
290 public void set(int key, int val) { | |
291 Elem e = new Elem(key, val); | |
292 int k = key % 128; | |
293 e.next = tab[k]; | |
294 tab[k] = e; | |
295 } | |
296 | |
297 public int state(int key) { | |
298 Elem e = tab[key % 128]; | |
299 while (e != null && e.key != key) | |
300 e = e.next; | |
301 return e == null ? 0 : e.val; | |
302 } | |
303 } | |
304 | |
305 // ----------------------------------------------------------------------------------- | |
306 // Scanner | |
307 // ----------------------------------------------------------------------------------- | |
308 @SuppressWarnings({"rawtypes", "unchecked"}) | |
309 public class Scanner { | |
310 | |
311 static final char EOL = '\n'; | |
312 static final int eofSym = 0; | |
313 static final int maxT = 25; | |
314 static final int noSym = 25; | |
315 | |
316 | |
317 public Buffer buffer; // scanner buffer | |
318 | |
319 Token t; // current token | |
320 int ch; // current input character | |
321 int pos; // byte position of current character | |
322 int charPos; // position by unicode characters starting with 0 | |
323 int col; // column number of current character | |
324 int line; // line number of current character | |
325 int oldEols; // EOLs that appeared in a comment; | |
326 static final StartStates start; // maps initial token character to start state | |
327 static final Map literals; // maps literal strings to literal kinds | |
328 | |
329 Token tokens; // list of tokens already peeked (first token is a dummy) | |
330 Token pt; // current peek token | |
331 | |
332 char[] tval = new char[16]; // token text used in NextToken(), dynamically enlarged | |
333 int tlen; // length of current token | |
334 | |
335 static { | |
336 start = new StartStates(); | |
337 literals = new HashMap(); | |
338 for (int i = 65; i <= 90; ++i) start.set(i, 1); | |
339 for (int i = 97; i <= 122; ++i) start.set(i, 1); | |
340 for (int i = 49; i <= 57; ++i) start.set(i, 4); | |
341 start.set(34, 2); | |
342 start.set(48, 5); | |
343 start.set(123, 6); | |
344 start.set(125, 7); | |
345 start.set(40, 8); | |
346 start.set(41, 9); | |
347 start.set(61, 20); | |
348 start.set(59, 10); | |
349 start.set(60, 21); | |
350 start.set(62, 22); | |
351 start.set(33, 14); | |
352 start.set(43, 16); | |
353 start.set(45, 17); | |
354 start.set(42, 18); | |
355 start.set(47, 19); | |
356 start.set(Buffer.EOF, -1); | |
357 literals.put("function", new Integer(4)); | |
358 literals.put("while", new Integer(7)); | |
359 literals.put("print", new Integer(12)); | |
360 literals.put("return", new Integer(13)); | |
361 literals.put("time", new Integer(24)); | |
362 | |
363 } | |
364 | |
365 public Scanner(String fileName) { | |
366 buffer = new Buffer(fileName); | |
367 Init(); | |
368 } | |
369 | |
370 public Scanner(InputStream s) { | |
371 buffer = new Buffer(s); | |
372 Init(); | |
373 } | |
374 | |
375 void Init() { | |
376 pos = -1; | |
377 line = 1; | |
378 col = 0; | |
379 charPos = -1; | |
380 oldEols = 0; | |
381 NextCh(); | |
382 if (ch == 0xEF) { // check optional byte order mark for UTF-8 | |
383 NextCh(); | |
384 int ch1 = ch; | |
385 NextCh(); | |
386 int ch2 = ch; | |
387 if (ch1 != 0xBB || ch2 != 0xBF) { | |
388 throw new FatalError("Illegal byte order mark at start of file"); | |
389 } | |
390 buffer = new UTF8Buffer(buffer); | |
391 col = 0; | |
392 charPos = -1; | |
393 NextCh(); | |
394 } | |
395 pt = tokens = new Token(); // first token is a dummy | |
396 } | |
397 | |
398 void NextCh() { | |
399 if (oldEols > 0) { | |
400 ch = EOL; | |
401 oldEols--; | |
402 } else { | |
403 pos = buffer.getPos(); | |
404 // buffer reads unicode chars, if UTF8 has been detected | |
405 ch = buffer.Read(); | |
406 col++; | |
407 charPos++; | |
408 // replace isolated '\r' by '\n' in order to make | |
409 // eol handling uniform across Windows, Unix and Mac | |
410 if (ch == '\r' && buffer.Peek() != '\n') | |
411 ch = EOL; | |
412 if (ch == EOL) { | |
413 line++; | |
414 col = 0; | |
415 } | |
416 } | |
417 | |
418 } | |
419 | |
420 void AddCh() { | |
421 if (tlen >= tval.length) { | |
422 char[] newBuf = new char[2 * tval.length]; | |
423 System.arraycopy(tval, 0, newBuf, 0, tval.length); | |
424 tval = newBuf; | |
425 } | |
426 if (ch != Buffer.EOF) { | |
427 tval[tlen++] = (char)ch; | |
428 | |
429 NextCh(); | |
430 } | |
431 } | |
432 | |
433 | |
434 boolean Comment0() { | |
435 int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; | |
436 NextCh(); | |
437 if (ch == '/') { | |
438 NextCh(); | |
439 for(;;) { | |
440 if (ch == 10) { | |
441 level--; | |
442 if (level == 0) { oldEols = line - line0; NextCh(); return true; } | |
443 NextCh(); | |
444 } else if (ch == Buffer.EOF) return false; | |
445 else NextCh(); | |
446 } | |
447 } else { | |
448 buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; | |
449 } | |
450 return false; | |
451 } | |
452 | |
453 boolean Comment1() { | |
454 int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; | |
455 NextCh(); | |
456 if (ch == '*') { | |
457 NextCh(); | |
458 for(;;) { | |
459 if (ch == '*') { | |
460 NextCh(); | |
461 if (ch == '/') { | |
462 level--; | |
463 if (level == 0) { oldEols = line - line0; NextCh(); return true; } | |
464 NextCh(); | |
465 } | |
466 } else if (ch == Buffer.EOF) return false; | |
467 else NextCh(); | |
468 } | |
469 } else { | |
470 buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; | |
471 } | |
472 return false; | |
473 } | |
474 | |
475 | |
476 void CheckLiteral() { | |
477 String val = t.val; | |
478 | |
479 Object kind = literals.get(val); | |
480 if (kind != null) { | |
481 t.kind = ((Integer) kind).intValue(); | |
482 } | |
483 } | |
484 | |
485 Token NextToken() { | |
486 while (ch == ' ' || | |
487 ch >= 9 && ch <= 10 || ch == 13 | |
488 ) NextCh(); | |
489 if (ch == '/' && Comment0() ||ch == '/' && Comment1()) return NextToken(); | |
490 int recKind = noSym; | |
491 int recEnd = pos; | |
492 t = new Token(); | |
493 t.pos = pos; | |
494 t.col = col; | |
495 t.line = line; | |
496 t.charPos = charPos; | |
497 int state = start.state(ch); | |
498 tlen = 0; | |
499 AddCh(); | |
500 | |
501 loop: for (;;) { | |
502 switch (state) { | |
503 case -1: { | |
504 t.kind = eofSym; | |
505 break loop; | |
506 } // NextCh already done | |
507 case 0: { | |
508 if (recKind != noSym) { | |
509 tlen = recEnd - t.pos; | |
510 SetScannerBehindT(); | |
511 } | |
512 t.kind = recKind; | |
513 break loop; | |
514 } // NextCh already done | |
515 case 1: | |
516 recEnd = pos; recKind = 1; | |
517 if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z') {AddCh(); state = 1; break;} | |
518 else {t.kind = 1; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} | |
519 case 2: | |
520 if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); state = 2; break;} | |
521 else if (ch == '"') {AddCh(); state = 3; break;} | |
522 else {state = 0; break;} | |
523 case 3: | |
524 {t.kind = 2; break loop;} | |
525 case 4: | |
526 recEnd = pos; recKind = 3; | |
527 if (ch >= '0' && ch <= '9') {AddCh(); state = 4; break;} | |
528 else {t.kind = 3; break loop;} | |
529 case 5: | |
530 {t.kind = 3; break loop;} | |
531 case 6: | |
532 {t.kind = 5; break loop;} | |
533 case 7: | |
534 {t.kind = 6; break loop;} | |
535 case 8: | |
536 {t.kind = 8; break loop;} | |
537 case 9: | |
538 {t.kind = 9; break loop;} | |
539 case 10: | |
540 {t.kind = 11; break loop;} | |
541 case 11: | |
542 {t.kind = 16; break loop;} | |
543 case 12: | |
544 {t.kind = 17; break loop;} | |
545 case 13: | |
546 {t.kind = 18; break loop;} | |
547 case 14: | |
548 if (ch == '=') {AddCh(); state = 15; break;} | |
549 else {state = 0; break;} | |
550 case 15: | |
551 {t.kind = 19; break loop;} | |
552 case 16: | |
553 {t.kind = 20; break loop;} | |
554 case 17: | |
555 {t.kind = 21; break loop;} | |
556 case 18: | |
557 {t.kind = 22; break loop;} | |
558 case 19: | |
559 {t.kind = 23; break loop;} | |
560 case 20: | |
561 recEnd = pos; recKind = 10; | |
562 if (ch == '=') {AddCh(); state = 13; break;} | |
563 else {t.kind = 10; break loop;} | |
564 case 21: | |
565 recEnd = pos; recKind = 14; | |
566 if (ch == '=') {AddCh(); state = 11; break;} | |
567 else {t.kind = 14; break loop;} | |
568 case 22: | |
569 recEnd = pos; recKind = 15; | |
570 if (ch == '=') {AddCh(); state = 12; break;} | |
571 else {t.kind = 15; break loop;} | |
572 | |
573 } | |
574 } | |
575 t.val = new String(tval, 0, tlen); | |
576 return t; | |
577 } | |
578 | |
579 private void SetScannerBehindT() { | |
580 buffer.setPos(t.pos); | |
581 NextCh(); | |
582 line = t.line; | |
583 col = t.col; | |
584 charPos = t.charPos; | |
585 for (int i = 0; i < tlen; i++) | |
586 NextCh(); | |
587 } | |
588 | |
589 // get the next token (possibly a token already seen during peeking) | |
590 public Token Scan() { | |
591 if (tokens.next == null) { | |
592 return NextToken(); | |
593 } else { | |
594 pt = tokens = tokens.next; | |
595 return tokens; | |
596 } | |
597 } | |
598 | |
599 // get the next token, ignore pragmas | |
600 public Token Peek() { | |
601 do { | |
602 if (pt.next == null) { | |
603 pt.next = NextToken(); | |
604 } | |
605 pt = pt.next; | |
606 } while (pt.kind > maxT); // skip pragmas | |
607 | |
608 return pt; | |
609 } | |
610 | |
611 // make sure that peeking starts at current scan position | |
612 public void ResetPeek() { | |
613 pt = tokens; | |
614 } | |
615 | |
616 // The following methods are used for the CLNG Editor and will be called with java.Reflection. | |
617 // If the editor won't be used these 3 functions are obsolete, | |
618 // otherwise changes within the signature of the methods will result in Syntax Highlighting not working properly | |
619 // anymore. | |
620 | |
621 // get the offset of the next Token | |
622 public int getPeekTokenOffset() { | |
623 return pt.pos; | |
624 } | |
625 | |
626 // get the String value of the Token | |
627 public String getPeekTokenVal() { | |
628 return pt.val; | |
629 } | |
630 | |
631 // get the Kind value of the Token | |
632 public int getPeekTokenKind() { | |
633 return pt.kind; | |
634 } | |
635 | |
636 } // end Scanner |