123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- import dopp;
- // helper - is lexeme keyword
- export bool isKeyword(string lexeme){
- //static immutable keywords = ["if", "else", "while", "for", "return"];
- static immutable keywords = ["dlang", "ptn", "ptns"];
- return keywords.canFind(lexeme);
- }
- // lexer - Tokenizer makes tokens from text
- //export Token[] tokenize(string source){
- export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
- Token[] tokens;
- bool inside_string = false;
- bool inside_comment = false;
- //ubyte inside_string_type = 0; // 0 = ".."; 1 = `..`; 2 = todo
- string str_helper = "";
- int i = 0;
- while(i < source.length){
-
- if(source[i] == '\n'){ // new line
- tokens ~= Token(TokenType.New_Line, "\n");
- i++;
-
-
- }else if(source[i].isWhite){ // skip whitespaces
- i++;
-
-
- }else if(source[i].isAlpha || (source[i] == '_') ){ // is unicode alphabetic character or underscore
- auto start = i;
- while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
- i++;
- }
- string lexeme = source[start .. i];
- tokens ~= Token(lexeme.isKeyword ? TokenType.Keyword : TokenType.Identifier, lexeme);
-
-
- }else if(source[i].isDigit){ // number
- auto start = i;
- while( (i < source.length) && (source[i].isDigit || (source[i] == '_') ) ){ // underscore can be inside number like 5_000 etc
- i++;
- }
- if( (i < source.length) && (source[i] == '.') ){ // include dot for float
- i++;
- while( (i < source.length) && source[i].isDigit){
- i++;
- }
- tokens ~= Token(TokenType.Float, source[start .. i]);
-
- }else{
- tokens ~= Token(TokenType.Integer, source[start .. i]);
- }
-
-
- }else if( (inside_string == false) && (inside_comment == false) && (source[i] == '/') && ( (i + 1) < source.length ) && (source[i + 1] == '/') ){ // single line comment "//" begins
- auto start = i;
- inside_comment = true;
- i++;
-
- while( (i < source.length) && inside_comment ){ // goto Type single line comment end position
- if(source[i] == '\n'){ // line end means single line comment ends
- i++;
- inside_string = false;
-
- }else{ // single line comment not ends yet
- i++;
- }
- }
- tokens ~= Token(TokenType.Comment_Line, source[start .. i]);
- tokens ~= Token(TokenType.New_Line, "\n");
-
-
- }else if( (inside_string == false) && (source[i] == '"') ){ // Type string ".." begins
- auto start = i++; // string begin position
- inside_string = true;
-
- while( (i < source.length) && inside_string ){ // goto Type string end position
- if( (source[i] == '\\') && ( (i + 1) < source.length ) && (source[i + 1] == '"') ){ // escaped " is not string end
- i += 2; // skip 2 symbols
-
- }else if(source[i] == '"'){ // close quote "
- inside_string = false;
-
- }else{ // string not ends yet
- i++;
- }
- }
- if(i < source.length){ // we count close quote
- i++;
- }
- tokens ~= Token(TokenType.String, source[start .. i]);
-
-
- }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
- auto start = i++; // string begin position
- inside_string = true;
- while( (i < source.length) && inside_string ){ // goto Type string end position
-
- // ` cannot be escaped in `..` string - so we can add ~ "`" for it - because this is not string end
- if( (source[i] == '`') && ( (i + 1) < source.length ) && (source[i + 1] != ';') ){ // todo check for '\n' next line for new version lexer-compiler for syntax without ; in lines-commands ends
- str_helper ~= source[start .. i] ~ "`" ~ `~ "` ~ "`" ~ `" ~ ` ~ "`"; // ` ~ "`" ~ ` -> ` after dlang compiling
- i++;
- start = i;
-
- }else if(source[i] == '`'){ // close quote `
- inside_string = false;
-
- }else{ // string not ends yet
- i++;
- }
- }
- if(i < source.length){ // we count close quote
- i++;
- }
- if(str_helper != ""){
- tokens ~= Token(TokenType.String, str_helper ~ source[start .. i]);
- str_helper = "";
- }else{
- tokens ~= Token(TokenType.String, source[start .. i]);
- }
-
-
- }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
- auto start = i++; // string begin position
- while( (i < source.length) && (source[i] != '"') ){ // goto Type string end position
- i++;
- }
- if(i < source.length){
- i++;
- }
- tokens ~= Token(TokenType.String, source[start .. i]);
-
-
- }else{ // common symbols as tokens
- tokens ~= Token(TokenType.Symbol, source[i].to!string);
- i++;
- }
- }
- return tokens;
- }
|