ScriptStack 1.0.4
Loading...
Searching...
No Matches
Lexer.cs
Go to the documentation of this file.
1using System;
2using System.Collections.Generic;
3using System.Text;
4
6{
7
20 public class Lexer
21 {
22
23 #region Private Enumerated Types
24
25 private enum State
26 {
27 None,
28 Divide,
29 InlineComment,
30 BlockComment,
31 Assign,
32 Plus,
33 Minus,
35 Xor,
36 Modulo,
37 And,
38 Or,
39 Not,
40 Greater,
41 Less,
43 String,
44 EscapeString,
45 Number,
46 Float,
47 Hex,
48 Bin,
49 Oct,
50 Char,
51 BinaryNot
52 }
53
54 #endregion
55
56 #region Private Variables
57
58 private List<string> lines;
59 private int line;
60 private int column;
61 private State state;
62
63 #endregion
64
65 #region Private Methods
66
67 private void InvalidCharacter(char ch)
68 {
69 throw new LexerException("Unerwartetes Zeichen '" + ch + "'.\n", line, column, lines[line]);
70 }
71
72 private bool EndOfSource
73 {
74 get { return line >= lines.Count; }
75 }
76
77 private char ReadChar()
78 {
79
80 if (EndOfSource)
81 throw new LexerException("Das Ende des TokenStream wurde erreicht.");
82
83 char ch = lines[line][column++];
84
85 if (column >= lines[line].Length)
86 {
87
88 column = 0;
89
90 ++line;
91
92 }
93
94 return ch;
95 }
96
97 private void UndoChar()
98 {
99
100 if (line == 0 && column == 0)
101 throw new LexerException("Der Anfang des TokenStream wurde erreicht.");
102
103 --column;
104
105 if (column < 0)
106 {
107
108 --line;
109
110 column = lines[line].Length - 1;
111
112 }
113
114 }
115
116 #endregion
117
118 #region Public Methods
119
120 public Lexer(List<string> lines)
121 {
122
123 this.lines = new List<string>();
124
125 foreach (string line in lines)
126 this.lines.Add(line + "\r\n");
127
128 line = 0;
129
130 column = 0;
131
132 state = State.None;
133
134 }
135
140 public List<Token> GetTokens()
141 {
142
143 line = 0;
144
145 column = 0;
146
147 state = State.None;
148
149 string lexeme = null;
150
151 List<Token> tokenStream = new List<Token>();
152
153 while (!EndOfSource)
154 {
155
156 string currentLine = lines[line];
157
158 char ch = ReadChar();
159
160 switch (state)
161 {
162
163 case State.None:
164 switch (ch)
165 {
166
167 case ' ':
168 case '\t':
169 case '\r':
170 case '\n':
171 break;
172
173 case '(':
174 tokenStream.Add(new Token(TokenType.LeftParen, "(", line, column, currentLine));
175 break;
176 case ')':
177 tokenStream.Add(new Token(TokenType.RightParen, ")", line, column, currentLine));
178 break;
179 case '[':
180 tokenStream.Add(new Token(TokenType.LeftBracket, "[", line, column, currentLine));
181 break;
182 case ']':
183 tokenStream.Add(new Token(TokenType.RightBracket, "]", line, column, currentLine));
184 break;
185 case '{':
186 tokenStream.Add(new Token(TokenType.LeftBrace, "{", line, column, currentLine));
187 break;
188 case '}':
189 tokenStream.Add(new Token(TokenType.RightBrace, "}", line, column, currentLine));
190 break;
191 case '.':
192 tokenStream.Add(new Token(TokenType.Period, ".", line, column, currentLine));
193 break;
194 case ':':
195 tokenStream.Add(new Token(TokenType.Colon, ":", line, column, currentLine));
196 break;
197 case ',':
198 tokenStream.Add(new Token(TokenType.Comma, ",", line, column, currentLine));
199 break;
200 case ';':
201 tokenStream.Add(new Token(TokenType.SemiColon, ";", line, column, currentLine));
202 break;
203
204
205 case '=':
206 state = State.Assign;
207 break;
208 case '+':
209 state = State.Plus;
210 break;
211 case '-':
212 state = State.Minus;
213 break;
214 case '*':
215 state = State.Multiply;
216 break;
217 case '/':
218 state = State.Divide;
219 break;
220 case '%':
221 state = State.Modulo;
222 break;
223 case '^':
224 state = State.Xor;
225 break;
226 case '&':
227 state = State.And;
228 break;
229 case '|':
230 state = State.Or;
231 break;
232 case '!':
233 state = State.Not;
234 break;
235 case '>':
236 state = State.Greater;
237 break;
238 case '<':
239 state = State.Less;
240 break;
241 case '\"':
242 lexeme = "";
243 state = State.String;
244 break;
245 case '\'':
246 lexeme = "";
247 state = State.Char;
248 break;
249 case '~':
250 state = State.BinaryNot;
251 break;
252
253 default:
254 if (char.IsLetter(ch) || ch == '_')
255 {
256 state = State.Identifier;
257 lexeme = "" + ch;
258 }
259 else if (char.IsDigit(ch))
260 {
261 lexeme = "" + ch;
262 state = State.Number;
263 }
264 else
265 InvalidCharacter(ch);
266 break;
267
268 }
269
270 break;
271
272 case State.BinaryNot:
273 if (ch == '=')
274 {
275 tokenStream.Add(new Token(TokenType.AssignBinaryNot, "~=", line, column, currentLine));
276 state = State.None;
277 }
278 break;
279
280 case State.Divide:
281 switch (ch)
282 {
283 case '/':
284 state = State.InlineComment;
285 break;
286 case '*':
287 state = State.BlockComment;
288 break;
289 case '=':
290 tokenStream.Add(new Token(TokenType.AssignDivide, "/=", line, column, currentLine));
291 state = State.None;
292 break;
293 default:
294 tokenStream.Add(new Token(TokenType.Divide, "/", line, column, currentLine));
295 UndoChar();
296 state = State.None;
297 break;
298 }
299 break;
300
301 case State.InlineComment:
302 // just read until a new line is encountered
303 if (ch == '\n')
304 state = State.None;
305 break;
306
307 case State.BlockComment:
308 if (ch == '*')
309 {
310
311 char next = ReadChar();
312
313 if (next == '/')
314 {
315 state = State.None;
316 break;
317 }
318
319 }
320 break;
321
322 case State.Assign:
323 if (ch == '=')
324 {
325 tokenStream.Add(new Token(TokenType.Equal, "==", line, column, currentLine));
326 state = State.None;
327 }
328 else
329 {
330 tokenStream.Add(new Token(TokenType.Assign, "=", line, column, currentLine));
331 UndoChar();
332 state = State.None;
333 }
334 break;
335
336 case State.Plus:
337 if (ch == '+')
338 {
339 tokenStream.Add(new Token(TokenType.Increment, "++", line, column, currentLine));
340 state = State.None;
341 }
342 else if (ch == '=')
343 {
344 tokenStream.Add(new Token(TokenType.AssignPlus, "+=", line, column, currentLine));
345 state = State.None;
346 }
347 else
348 {
349 tokenStream.Add(new Token(TokenType.Plus, "+", line, column, currentLine));
350 UndoChar();
351 state = State.None;
352 }
353 break;
354
355 case State.Minus:
356 if (ch == '-')
357 {
358 tokenStream.Add(new Token(TokenType.Decrement, "--", line, column, currentLine));
359 state = State.None;
360 }
361 else if (ch == '=')
362 {
363 tokenStream.Add(new Token(TokenType.AssignMinus, "-=", line, column, currentLine));
364 state = State.None;
365 }
366 else
367 {
368 tokenStream.Add(new Token(TokenType.Minus, "-", line, column, currentLine));
369 UndoChar();
370 state = State.None;
371 }
372 break;
373
374 case State.Multiply:
375 if (ch == '=')
376 {
377 tokenStream.Add(new Token(TokenType.AssignMultiply, "*=", line, column, currentLine));
378 state = State.None;
379 }
380 else
381 {
382 tokenStream.Add(new Token(TokenType.Multiply, "*", line, column, currentLine));
383 UndoChar();
384 state = State.None;
385 }
386 break;
387
388 case State.Xor:
389 if (ch == '=')
390 {
391 tokenStream.Add(new Token(TokenType.AssignXor, "^=", line, column, currentLine));
392 state = State.None;
393 }
394 break;
395
396 case State.Modulo:
397 if (ch == '=')
398 {
399 tokenStream.Add(new Token(TokenType.AssignModulo, "%=", line, column, currentLine));
400 state = State.None;
401 }
402 else
403 {
404 tokenStream.Add(new Token(TokenType.Modulo, "%", line, column, currentLine));
405 UndoChar();
406 state = State.None;
407 }
408 break;
409
410 case State.And:
411 if (ch == '&')
412 {
413 tokenStream.Add(new Token(TokenType.And, "&&", line, column, currentLine));
414 state = State.None;
415 }
416 else if (ch == '=')
417 {
418 tokenStream.Add(new Token(TokenType.AssignBinaryAnd, "&=", line, column, currentLine));
419 state = State.None;
420 }
421 else
422 InvalidCharacter(ch);
423 break;
424
425 case State.Or:
426 if (ch == '|')
427 {
428 tokenStream.Add(new Token(TokenType.Or, "||", line, column, currentLine));
429 state = State.None;
430 }
431 else if (ch == '=')
432 {
433 tokenStream.Add(new Token(TokenType.AssignBinaryOr, "|=", line, column, currentLine));
434 state = State.None;
435 }
436 else
437 InvalidCharacter(ch);
438 break;
439
440 case State.Not:
441 if (ch == '=')
442 {
443 tokenStream.Add(new Token(TokenType.NotEqual, "!=", line, column, currentLine));
444 state = State.None;
445 }
446 else
447 {
448 tokenStream.Add(new Token(TokenType.Not, "!", line, column, currentLine));
449 UndoChar();
450 state = State.None;
451 }
452 break;
453
454 case State.Greater:
455 if (ch == '=')
456 {
457 tokenStream.Add(new Token(TokenType.GreaterEqual, ">=", line, column, currentLine));
458 state = State.None;
459 }
460 else if (ch == '>')
461 {
462 tokenStream.Add(new Token(TokenType.ShiftRight, ">>", line, column, currentLine));
463 state = State.None;
464 }
465 else
466 {
467 tokenStream.Add(new Token(TokenType.Greater, ">", line, column, currentLine));
468 UndoChar();
469 state = State.None;
470 }
471 break;
472
473 case State.Less:
474 if (ch == '=')
475 {
476 tokenStream.Add(new Token(TokenType.LessEqual, "<=", line, column, currentLine));
477 state = State.None;
478 }
479 else if (ch == '<')
480 {
481 tokenStream.Add(new Token(TokenType.ShiftLeft, "<<", line, column, currentLine));
482 state = State.None;
483 }
484 else
485 {
486 tokenStream.Add(new Token(TokenType.Less, "<", line, column, currentLine));
487 UndoChar();
488 state = State.None;
489 }
490 break;
491
492 case State.Identifier:
493
494 if (char.IsLetterOrDigit(ch) || ch == '_')
495 lexeme += ch;
496
497 else
498 {
499
500 TokenType tokenType;
501
502 if (lexeme == "null")
503 tokenType = TokenType.Null;
504 else if (lexeme == "true" || lexeme == "false")
505 tokenType = TokenType.Boolean;
506 else if (lexeme == "if")
507 tokenType = TokenType.If;
508 else if (lexeme == "else")
509 tokenType = TokenType.Else;
510 else if (lexeme == "while")
511 tokenType = TokenType.While;
512 else if (lexeme == "for")
513 tokenType = TokenType.For;
514 else if (lexeme == "foreach")
515 tokenType = TokenType.Foreach;
516 else if (lexeme == "in")
517 tokenType = TokenType.In;
518 else if (lexeme == "switch")
519 tokenType = TokenType.Switch;
520 else if (lexeme == "case")
521 tokenType = TokenType.Case;
522 else if (lexeme == "default")
523 tokenType = TokenType.Default;
524 else if (lexeme == "break")
525 tokenType = TokenType.Break;
526 else if (lexeme == "continue")
527 tokenType = TokenType.Continue;
528 else if (lexeme == "function")
529 tokenType = TokenType.Function;
530 else if (lexeme == "return")
531 tokenType = TokenType.Return;
532
533 else if (lexeme == "shared")
534 tokenType = TokenType.Shared;
535 else if (lexeme == "var")
536 tokenType = TokenType.Var;
537 else if (lexeme == "volatile")
538 tokenType = TokenType.Volatile;
539 else if (lexeme == "struct")
540 tokenType = TokenType.Struct;
541 else if (lexeme == "enum")
542 tokenType = TokenType.Enum;
543
544 else if (lexeme == "include")
545 tokenType = TokenType.Include;
546 else if (lexeme == "lock")
547 tokenType = TokenType.Lock;
548 else if (lexeme == "run")
549 tokenType = TokenType.Run;
550 else if (lexeme == "yield")
551 tokenType = TokenType.Yield;
552 else if (lexeme == "notify")
553 tokenType = TokenType.Notify;
554 else if (lexeme == "wait")
555 tokenType = TokenType.Wait;
556 else
557 tokenType = TokenType.Identifier;
558
559 if (tokenType == TokenType.Boolean)
560 {
561
562 bool val = false;
563
564 if (lexeme == "true")
565 val = true;
566
567 tokenStream.Add(new Token(tokenType, val, line, column, currentLine));
568
569 }
570
571 else
572 tokenStream.Add(new Token(tokenType, lexeme, line, column, currentLine));
573
574 UndoChar();
575
576 state = State.None;
577
578 }
579 break;
580
581 case State.Char:
582 /* \Todo */
583 while (ch != '\'')
584 {
585 lexeme += ch;
586 ch = ReadChar();
587 }
588 if (ch == '\'')
589 {
590 char c;
591 if (lexeme == "\\n") c = '\n';
592 else if (lexeme == "\\t") c = '\t';
593 else if (lexeme == "\\b") c = '\b';
594 else if (lexeme == "\\r") c = '\r';
595 else if (lexeme == "\\f") c = '\f';
596 else if (lexeme == "\\\'") c = '\'';
597 else if (lexeme == "\\\"") c = '\"';
598 else if (lexeme == "\\\\") c = '\\';
599 else c = char.Parse(lexeme);
600 tokenStream.Add(new Token(TokenType.Char, c, line, column, currentLine));
601 state = State.None;
602 }
603 else
604 throw new LexerException("Ein 'Character' darf genau ein Zeichen lang sein - ausgenommen Steuerzeichen!", line, column, lines[line]);
605 break;
606
607 case State.String:
608 if (ch == '\"') // string is ready!
609 {
610 tokenStream.Add(new Token(TokenType.String, lexeme, line, column, currentLine));
611 state = State.None;
612 }
613 else if (ch == '\\') // escape character, start string escape
614 {
615 state = State.EscapeString;
616 }
617 else if (ch == '\r' || ch == '\n') // if there is actually a line break inside the string..
618 {
619 throw new LexerException("Ein String darf sich nicht auf mehrere Zeilen erstrecken.", line, column, lines[line]);
620 }
621 else // just add the character
622 {
623 lexeme += ch;
624 }
625 break;
626
627 case State.EscapeString:
628 /*
629 * Always return to TokenState.String because we are inside a string!
630 */
631 if (ch == '"')
632 {
633 lexeme += '\"';
634 state = State.String;
635 }
636 else if (ch == '\\')
637 {
638 lexeme += ch;
639 state = State.String;
640 }
641 else if (ch == 'n')
642 {
643 lexeme += '\n';
644 state = State.String;
645 }
646 else if (ch == 't')
647 {
648 lexeme += '\t';
649 state = State.String;
650 }
651 else if (ch == 'r')
652 {
653 lexeme += '\r';
654 state = State.String;
655 }
656 else if (ch == 'n')
657 {
658 lexeme += '\n';
659 state = State.String;
660 }
661 else if (ch == 'b')
662 {
663 lexeme += '\b';
664 state = State.String;
665 }
666 else
667 throw new LexerException("Das Escapezeichen '\\" + ch + "' kann in Strings nicht verarbeitet werden.", line, column, lines[line]);
668
669 break;
670
671 case State.Number:
672 /*
673 * In the phase of lexing numbers are also strings
674 * They are casted later on
675 */
676 if (char.IsDigit(ch))
677 lexeme += ch;
678 else if (ch == '.') // culture?!?
679 {
680 lexeme += '.';
681 state = State.Float;
682 }
683 else if (ch == 'x')
684 {
685 lexeme += ch;
686 state = State.Hex;
687 }
688 else if (ch == 'b')
689 {
690 int intValue = Convert.ToInt32(lexeme, 2);
691 // \todo in fact this is a 32 bit integer
692 tokenStream.Add(new Token(TokenType.Integer, intValue, line, column, currentLine));
693 state = State.None;
694 }
695 else if (ch == 'o')
696 {
697 int intValue = Convert.ToInt32(lexeme, 8);
698 tokenStream.Add(new Token(TokenType.Integer, intValue, line, column, currentLine));
699 state = State.None;
700 }
701 else
702 {
703 int intValue = int.Parse(lexeme);
704 tokenStream.Add(new Token(TokenType.Integer, intValue, line, column, currentLine));
705 UndoChar();
706 state = State.None;
707 }
708 break;
709
710 case State.Float:
711 if (char.IsDigit(ch))
712 lexeme += ch;
713 else
714 {
715 float floatValue = float.Parse(lexeme, System.Globalization.CultureInfo.InvariantCulture);
716 tokenStream.Add(new Token(TokenType.Float, floatValue, line, column, currentLine));
717 UndoChar();
718 state = State.None;
719 }
720 break;
721
722 case State.Hex:
723 if (char.IsDigit(ch) || char.IsLetter(ch))
724 {
725 if (char.IsLetter(ch) && !(ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))
726 throw new LexerException("Ein hexadezimaler Wert darf ausser Zahlen nur Buchstaben von 'a' - 'f' bzw. 'A' - 'F' enthalten.", line, column, currentLine);
727 lexeme += ch;
728 }
729 else
730 {
731 int intValue = Convert.ToInt32(lexeme, 16);
732 tokenStream.Add(new Token(TokenType.Integer, intValue, line, column, currentLine));
733 UndoChar();
734 state = State.None;
735 }
736 break;
737
738 default:
739 throw new LexerException("Unbekannter Lexer Status '" + state + "'.");
740
741 }
742
743 }
744
745 if (state != State.None)
746 throw new LexerException("Unerwartetes Ende des TokenStream.");
747
748 return tokenStream;
749
750 }
751
752 #endregion
753
754 }
755
756}
Lexer(List< string > lines)
Definition Lexer.cs:120
List< Token > GetTokens()
Definition Lexer.cs:140
A lexical token or simply token is a string with an assigned and thus identified meaning.
Definition Token.cs:100
TokenType
Known types of Token.
Definition Token.cs:12