Mercurial > hg > graal-compiler
comparison graal/com.oracle.truffle.sl/src/com/oracle/truffle/sl/parser/Scanner.frame @ 7292:213c1297a814
Simple Language: A simple dynamic programming language to demonstrate Truffle features
author | Christian Wimmer <christian.wimmer@oracle.com> |
---|---|
date | Fri, 21 Dec 2012 10:45:37 -0800 |
parents | |
children | dd1b2da27b38 |
comparison
equal
deleted
inserted
replaced
7291:a748e4d44694 | 7292:213c1297a814 |
---|---|
1 /*------------------------------------------------------------------------- | |
2 Compiler Generator Coco/R, | |
3 Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz | |
4 extended by M. Loeberbauer & A. Woess, Univ. of Linz | |
5 ported from C# to Java by Wolfgang Ahorner | |
6 with improvements by Pat Terry, Rhodes University | |
7 | |
8 This program is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
10 Free Software Foundation; either version 2, or (at your option) any | |
11 later version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but | |
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along | |
19 with this program; if not, write to the Free Software Foundation, Inc., | |
20 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
21 | |
22 As an exception, it is allowed to write an extension of Coco/R that is | |
23 used as a plugin in non-free software. | |
24 | |
25 If not otherwise stated, any source code generated by Coco/R (other than | |
26 Coco/R itself) does not fall under the GNU General Public License. | |
27 ------------------------------------------------------------------------*/ | |
28 -->begin | |
29 import java.io.InputStream; | |
30 import java.io.IOException; | |
31 import java.io.RandomAccessFile; | |
32 import java.util.Map; | |
33 import java.util.HashMap; | |
34 | |
35 // Checkstyle: stop | |
36 class Token { | |
37 | |
38 public int kind; // token kind | |
39 public int pos; // token position in bytes in the source text (starting at 0) | |
40 public int charPos; // token position in characters in the source text (starting at 0) | |
41 public int col; // token column (starting at 1) | |
42 public int line; // token line (starting at 1) | |
43 public String val; // token value | |
44 public Token next; // ML 2005-03-11 Peek tokens are kept in linked list | |
45 } | |
46 | |
47 // ----------------------------------------------------------------------------------- | |
48 // Buffer | |
49 // ----------------------------------------------------------------------------------- | |
50 class Buffer { | |
51 | |
52 // This Buffer supports the following cases: | |
53 // 1) seekable stream (file) | |
54 // a) whole stream in buffer | |
55 // b) part of stream in buffer | |
56 // 2) non seekable stream (network, console) | |
57 | |
58 public static final int EOF = Character.MAX_VALUE + 1; | |
59 private static final int MIN_BUFFER_LENGTH = 1024; // 1KB | |
60 private static final int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB | |
61 private byte[] buf; // input buffer | |
62 private int bufStart; // position of first byte in buffer relative to input stream | |
63 private int bufLen; // length of buffer | |
64 private int fileLen; // length of input stream (may change if stream is no file) | |
65 private int bufPos; // current position in buffer | |
66 private RandomAccessFile file; // input stream (seekable) | |
67 private InputStream stream; // growing input stream (e.g.: console, network) | |
68 | |
69 public Buffer(InputStream s) { | |
70 stream = s; | |
71 fileLen = bufLen = bufStart = bufPos = 0; | |
72 buf = new byte[MIN_BUFFER_LENGTH]; | |
73 } | |
74 | |
75 public Buffer(String fileName) { | |
76 try { | |
77 file = new RandomAccessFile(fileName, "r"); | |
78 fileLen = (int) file.length(); | |
79 bufLen = Math.min(fileLen, MAX_BUFFER_LENGTH); | |
80 buf = new byte[bufLen]; | |
81 bufStart = Integer.MAX_VALUE; // nothing in buffer so far | |
82 if (fileLen > 0) | |
83 setPos(0); // setup buffer to position 0 (start) | |
84 else | |
85 bufPos = 0; // index 0 is already after the file, thus setPos(0) is invalid | |
86 if (bufLen == fileLen) | |
87 Close(); | |
88 } catch (IOException e) { | |
89 throw new FatalError("Could not open file " + fileName); | |
90 } | |
91 } | |
92 | |
93 // don't use b after this call anymore | |
94 // called in UTF8Buffer constructor | |
95 protected Buffer(Buffer b) { | |
96 buf = b.buf; | |
97 bufStart = b.bufStart; | |
98 bufLen = b.bufLen; | |
99 fileLen = b.fileLen; | |
100 bufPos = b.bufPos; | |
101 file = b.file; | |
102 stream = b.stream; | |
103 // keep finalize from closing the file | |
104 b.file = null; | |
105 } | |
106 | |
107 @Override | |
108 protected void finalize() throws Throwable { | |
109 super.finalize(); | |
110 Close(); | |
111 } | |
112 | |
113 protected void Close() { | |
114 if (file != null) { | |
115 try { | |
116 file.close(); | |
117 file = null; | |
118 } catch (IOException e) { | |
119 throw new FatalError(e.getMessage()); | |
120 } | |
121 } | |
122 } | |
123 | |
124 public int Read() { | |
125 if (bufPos < bufLen) { | |
126 return buf[bufPos++] & 0xff; // mask out sign bits | |
127 } else if (getPos() < fileLen) { | |
128 setPos(getPos()); // shift buffer start to pos | |
129 return buf[bufPos++] & 0xff; // mask out sign bits | |
130 } else if (stream != null && ReadNextStreamChunk() > 0) { | |
131 return buf[bufPos++] & 0xff; // mask out sign bits | |
132 } else { | |
133 return EOF; | |
134 } | |
135 } | |
136 | |
137 public int Peek() { | |
138 int curPos = getPos(); | |
139 int ch = Read(); | |
140 setPos(curPos); | |
141 return ch; | |
142 } | |
143 | |
144 // beg .. begin, zero-based, inclusive, in byte | |
145 // end .. end, zero-based, exclusive, in byte | |
146 public String GetString(int beg, int end) { | |
147 int len = 0; | |
148 char[] buffer = new char[end - beg]; | |
149 int oldPos = getPos(); | |
150 setPos(beg); | |
151 while (getPos() < end) | |
152 buffer[len++] = (char) Read(); | |
153 setPos(oldPos); | |
154 return new String(buffer, 0, len); | |
155 } | |
156 | |
157 public int getPos() { | |
158 return bufPos + bufStart; | |
159 } | |
160 | |
161 public void setPos(int value) { | |
162 if (value >= fileLen && stream != null) { | |
163 // Wanted position is after buffer and the stream | |
164 // is not seek-able e.g. network or console, | |
165 // thus we have to read the stream manually till | |
166 // the wanted position is in sight. | |
167 while (value >= fileLen && ReadNextStreamChunk() > 0) { | |
168 // nothing to do... | |
169 } | |
170 } | |
171 | |
172 if (value < 0 || value > fileLen) { | |
173 throw new FatalError("buffer out of bounds access, position: " + value); | |
174 } | |
175 | |
176 if (value >= bufStart && value < bufStart + bufLen) { // already in buffer | |
177 bufPos = value - bufStart; | |
178 } else if (file != null) { // must be swapped in | |
179 try { | |
180 file.seek(value); | |
181 bufLen = file.read(buf); | |
182 bufStart = value; | |
183 bufPos = 0; | |
184 } catch (IOException e) { | |
185 throw new FatalError(e.getMessage()); | |
186 } | |
187 } else { | |
188 // set the position to the end of the file, Pos will return fileLen. | |
189 bufPos = fileLen - bufStart; | |
190 } | |
191 } | |
192 | |
193 // Read the next chunk of bytes from the stream, increases the buffer | |
194 // if needed and updates the fields fileLen and bufLen. | |
195 // Returns the number of bytes read. | |
196 private int ReadNextStreamChunk() { | |
197 int free = buf.length - bufLen; | |
198 if (free == 0) { | |
199 // in the case of a growing input stream | |
200 // we can neither seek in the stream, nor can we | |
201 // foresee the maximum length, thus we must adapt | |
202 // the buffer size on demand. | |
203 byte[] newBuf = new byte[bufLen * 2]; | |
204 System.arraycopy(buf, 0, newBuf, 0, bufLen); | |
205 buf = newBuf; | |
206 free = bufLen; | |
207 } | |
208 | |
209 int read; | |
210 try { | |
211 read = stream.read(buf, bufLen, free); | |
212 } catch (IOException ioex) { | |
213 throw new FatalError(ioex.getMessage()); | |
214 } | |
215 | |
216 if (read > 0) { | |
217 fileLen = bufLen = (bufLen + read); | |
218 return read; | |
219 } | |
220 // end of stream reached | |
221 return 0; | |
222 } | |
223 } | |
224 | |
225 // ----------------------------------------------------------------------------------- | |
226 // UTF8Buffer | |
227 // ----------------------------------------------------------------------------------- | |
228 class UTF8Buffer extends Buffer { | |
229 | |
230 UTF8Buffer(Buffer b) { | |
231 super(b); | |
232 } | |
233 | |
234 @Override | |
235 public int Read() { | |
236 int ch; | |
237 do { | |
238 ch = super.Read(); | |
239 // until we find a utf8 start (0xxxxxxx or 11xxxxxx) | |
240 } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF)); | |
241 if (ch < 128 || ch == EOF) { | |
242 // nothing to do, first 127 chars are the same in ascii and utf8 | |
243 // 0xxxxxxx or end of file character | |
244 } else if ((ch & 0xF0) == 0xF0) { | |
245 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
246 int c1 = ch & 0x07; | |
247 ch = super.Read(); | |
248 int c2 = ch & 0x3F; | |
249 ch = super.Read(); | |
250 int c3 = ch & 0x3F; | |
251 ch = super.Read(); | |
252 int c4 = ch & 0x3F; | |
253 ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; | |
254 } else if ((ch & 0xE0) == 0xE0) { | |
255 // 1110xxxx 10xxxxxx 10xxxxxx | |
256 int c1 = ch & 0x0F; | |
257 ch = super.Read(); | |
258 int c2 = ch & 0x3F; | |
259 ch = super.Read(); | |
260 int c3 = ch & 0x3F; | |
261 ch = (((c1 << 6) | c2) << 6) | c3; | |
262 } else if ((ch & 0xC0) == 0xC0) { | |
263 // 110xxxxx 10xxxxxx | |
264 int c1 = ch & 0x1F; | |
265 ch = super.Read(); | |
266 int c2 = ch & 0x3F; | |
267 ch = (c1 << 6) | c2; | |
268 } | |
269 return ch; | |
270 } | |
271 } | |
272 | |
273 // ----------------------------------------------------------------------------------- | |
274 // StartStates -- maps characters to start states of tokens | |
275 // ----------------------------------------------------------------------------------- | |
276 class StartStates { | |
277 | |
278 private static class Elem { | |
279 | |
280 public int key, val; | |
281 public Elem next; | |
282 | |
283 public Elem(int key, int val) { | |
284 this.key = key; | |
285 this.val = val; | |
286 } | |
287 } | |
288 | |
289 private Elem[] tab = new Elem[128]; | |
290 | |
291 public void set(int key, int val) { | |
292 Elem e = new Elem(key, val); | |
293 int k = key % 128; | |
294 e.next = tab[k]; | |
295 tab[k] = e; | |
296 } | |
297 | |
298 public int state(int key) { | |
299 Elem e = tab[key % 128]; | |
300 while (e != null && e.key != key) | |
301 e = e.next; | |
302 return e == null ? 0 : e.val; | |
303 } | |
304 } | |
305 | |
306 // ----------------------------------------------------------------------------------- | |
307 // Scanner | |
308 // ----------------------------------------------------------------------------------- | |
309 @SuppressWarnings({"rawtypes", "unchecked"}) | |
310 public class Scanner { | |
311 | |
312 static final char EOL = '\n'; | |
313 static final int eofSym = 0; | |
314 -->declarations | |
315 | |
316 public Buffer buffer; // scanner buffer | |
317 | |
318 Token t; // current token | |
319 int ch; // current input character | |
320 int pos; // byte position of current character | |
321 int charPos; // position by unicode characters starting with 0 | |
322 int col; // column number of current character | |
323 int line; // line number of current character | |
324 int oldEols; // EOLs that appeared in a comment; | |
325 static final StartStates start; // maps initial token character to start state | |
326 static final Map literals; // maps literal strings to literal kinds | |
327 | |
328 Token tokens; // list of tokens already peeked (first token is a dummy) | |
329 Token pt; // current peek token | |
330 | |
331 char[] tval = new char[16]; // token text used in NextToken(), dynamically enlarged | |
332 int tlen; // length of current token | |
333 | |
334 static { | |
335 start = new StartStates(); | |
336 literals = new HashMap(); | |
337 -->initialization | |
338 } | |
339 | |
340 public Scanner(String fileName) { | |
341 buffer = new Buffer(fileName); | |
342 Init(); | |
343 } | |
344 | |
345 public Scanner(InputStream s) { | |
346 buffer = new Buffer(s); | |
347 Init(); | |
348 } | |
349 | |
350 void Init() { | |
351 pos = -1; | |
352 line = 1; | |
353 col = 0; | |
354 charPos = -1; | |
355 oldEols = 0; | |
356 NextCh(); | |
357 if (ch == 0xEF) { // check optional byte order mark for UTF-8 | |
358 NextCh(); | |
359 int ch1 = ch; | |
360 NextCh(); | |
361 int ch2 = ch; | |
362 if (ch1 != 0xBB || ch2 != 0xBF) { | |
363 throw new FatalError("Illegal byte order mark at start of file"); | |
364 } | |
365 buffer = new UTF8Buffer(buffer); | |
366 col = 0; | |
367 charPos = -1; | |
368 NextCh(); | |
369 } | |
370 pt = tokens = new Token(); // first token is a dummy | |
371 } | |
372 | |
373 void NextCh() { | |
374 if (oldEols > 0) { | |
375 ch = EOL; | |
376 oldEols--; | |
377 } else { | |
378 pos = buffer.getPos(); | |
379 // buffer reads unicode chars, if UTF8 has been detected | |
380 ch = buffer.Read(); | |
381 col++; | |
382 charPos++; | |
383 // replace isolated '\r' by '\n' in order to make | |
384 // eol handling uniform across Windows, Unix and Mac | |
385 if (ch == '\r' && buffer.Peek() != '\n') | |
386 ch = EOL; | |
387 if (ch == EOL) { | |
388 line++; | |
389 col = 0; | |
390 } | |
391 } | |
392 -->casing | |
393 } | |
394 | |
395 void AddCh() { | |
396 if (tlen >= tval.length) { | |
397 char[] newBuf = new char[2 * tval.length]; | |
398 System.arraycopy(tval, 0, newBuf, 0, tval.length); | |
399 tval = newBuf; | |
400 } | |
401 if (ch != Buffer.EOF) { | |
402 -->casing2 | |
403 NextCh(); | |
404 } | |
405 } | |
406 | |
407 -->comments | |
408 | |
409 void CheckLiteral() { | |
410 String val = t.val; | |
411 -->casing3 | |
412 Object kind = literals.get(val); | |
413 if (kind != null) { | |
414 t.kind = ((Integer) kind).intValue(); | |
415 } | |
416 } | |
417 | |
418 Token NextToken() { | |
419 while (ch == ' ' || | |
420 -->scan1 | |
421 ) NextCh(); | |
422 -->scan2 | |
423 int recKind = noSym; | |
424 int recEnd = pos; | |
425 t = new Token(); | |
426 t.pos = pos; | |
427 t.col = col; | |
428 t.line = line; | |
429 t.charPos = charPos; | |
430 int state = start.state(ch); | |
431 tlen = 0; | |
432 AddCh(); | |
433 | |
434 loop: for (;;) { | |
435 switch (state) { | |
436 case -1: { | |
437 t.kind = eofSym; | |
438 break loop; | |
439 } // NextCh already done | |
440 case 0: { | |
441 if (recKind != noSym) { | |
442 tlen = recEnd - t.pos; | |
443 SetScannerBehindT(); | |
444 } | |
445 t.kind = recKind; | |
446 break loop; | |
447 } // NextCh already done | |
448 -->scan3 | |
449 } | |
450 } | |
451 t.val = new String(tval, 0, tlen); | |
452 return t; | |
453 } | |
454 | |
455 private void SetScannerBehindT() { | |
456 buffer.setPos(t.pos); | |
457 NextCh(); | |
458 line = t.line; | |
459 col = t.col; | |
460 charPos = t.charPos; | |
461 for (int i = 0; i < tlen; i++) | |
462 NextCh(); | |
463 } | |
464 | |
465 // get the next token (possibly a token already seen during peeking) | |
466 public Token Scan() { | |
467 if (tokens.next == null) { | |
468 return NextToken(); | |
469 } else { | |
470 pt = tokens = tokens.next; | |
471 return tokens; | |
472 } | |
473 } | |
474 | |
475 // get the next token, ignore pragmas | |
476 public Token Peek() { | |
477 do { | |
478 if (pt.next == null) { | |
479 pt.next = NextToken(); | |
480 } | |
481 pt = pt.next; | |
482 } while (pt.kind > maxT); // skip pragmas | |
483 | |
484 return pt; | |
485 } | |
486 | |
487 // make sure that peeking starts at current scan position | |
488 public void ResetPeek() { | |
489 pt = tokens; | |
490 } | |
491 | |
492 // The following methods are used for the CLNG Editor and will be called with java.Reflection. | |
493 // If the editor won't be used these 3 functions are obsolete, | |
494 // otherwise changes within the signature of the methods will result in Syntax Highlighting not working properly | |
495 // anymore. | |
496 | |
497 // get the offset of the next Token | |
498 public int getPeekTokenOffset() { | |
499 return pt.pos; | |
500 } | |
501 | |
502 // get the String value of the Token | |
503 public String getPeekTokenVal() { | |
504 return pt.val; | |
505 } | |
506 | |
507 // get the Kind value of the Token | |
508 public int getPeekTokenKind() { | |
509 return pt.kind; | |
510 } | |
511 | |
512 } // end Scanner |