|
@@ -3,36 +3,48 @@ import dopp;
|
|
|
|
|
|
// helper - is lexeme keyword
|
|
|
export bool isKeyword(string lexeme){
|
|
|
- static immutable keywords = ["if", "else", "while", "for", "return"];
|
|
|
+ //static immutable keywords = ["if", "else", "while", "for", "return"];
|
|
|
+ static immutable keywords = ["dlang", "ptn", "ptns"];
|
|
|
return keywords.canFind(lexeme);
|
|
|
}
|
|
|
|
|
|
|
|
|
// lexer - Tokenizer makes tokens from text
|
|
|
-export Token[] tokenize(string source){
|
|
|
+//export Token[] tokenize(string source){
|
|
|
+export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
|
|
|
Token[] tokens;
|
|
|
+ bool inside_string = false;
|
|
|
+ //ubyte inside_string_type = 0; // 0 = ".."; 1 = `..`; 2 = todo
|
|
|
+ string str_helper = "";
|
|
|
int i = 0;
|
|
|
while(i < source.length){
|
|
|
|
|
|
- if(source[i].isWhite){ // skip whitespaces
|
|
|
+ if(source[i] == '\n'){ // new line
|
|
|
+ tokens ~= Token(TokenType.New_Line, "\n");
|
|
|
i++;
|
|
|
|
|
|
- }else if(source[i].isAlpha || source[i] == '_'){
|
|
|
+
|
|
|
+ }else if(source[i].isWhite){ // skip whitespaces
|
|
|
+ i++;
|
|
|
+
|
|
|
+
|
|
|
+ }else if(source[i].isAlpha || (source[i] == '_') ){ // is unicode alphabetic character or underscore
|
|
|
auto start = i;
|
|
|
- while(i < source.length && (source[i].isAlphaNum || source[i] == '_')){
|
|
|
+ while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
|
|
|
i++;
|
|
|
}
|
|
|
string lexeme = source[start .. i];
|
|
|
tokens ~= Token(lexeme.isKeyword ? TokenType.Keyword : TokenType.Identifier, lexeme);
|
|
|
|
|
|
+
|
|
|
}else if(source[i].isDigit){ // number
|
|
|
auto start = i;
|
|
|
- while(i < source.length && (source[i].isDigit || source[i] == '_')){ // underscore can be inside number like 5_000 etc
|
|
|
+ while( (i < source.length) && (source[i].isDigit || (source[i] == '_') ) ){ // underscore can be inside number like 5_000 etc
|
|
|
i++;
|
|
|
}
|
|
|
- if(i < source.length && source[i] == '.'){ // include dot for float
|
|
|
+ if( (i < source.length) && (source[i] == '.') ){ // include dot for float
|
|
|
i++;
|
|
|
- while(i < source.length && source[i].isDigit){
|
|
|
+ while( (i < source.length) && source[i].isDigit){
|
|
|
i++;
|
|
|
}
|
|
|
tokens ~= Token(TokenType.Float, source[start .. i]);
|
|
@@ -41,16 +53,68 @@ export Token[] tokenize(string source){
|
|
|
tokens ~= Token(TokenType.Integer, source[start .. i]);
|
|
|
}
|
|
|
|
|
|
- }else if(source[i] == '"'){ // type string begins
|
|
|
+
|
|
|
+ }else if( (inside_string == false) && (source[i] == '"') ){ // Type string ".." begins
|
|
|
+ auto start = i++; // string begin position
|
|
|
+ inside_string = true;
|
|
|
+
|
|
|
+ while( (i < source.length) && inside_string ){ // goto Type string end position
|
|
|
+ if( (source[i] == '\\') && ( (i + 1) < source.length ) && (source[i + 1] == '"') ){ // escaped " is not string end
|
|
|
+ i += 2; // skip 2 symbols
|
|
|
+
|
|
|
+ }else if(source[i] == '"'){ // close quote "
|
|
|
+ inside_string = false;
|
|
|
+
|
|
|
+ }else{ // string not ends yet
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(i < source.length){ // we count close quote
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ tokens ~= Token(TokenType.String, source[start .. i]);
|
|
|
+
|
|
|
+
|
|
|
+ }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
|
|
|
+ auto start = i++; // string begin position
|
|
|
+ inside_string = true;
|
|
|
+ while( (i < source.length) && inside_string ){ // goto Type string end position
|
|
|
+
|
|
|
+ // ` cannot be escaped in `..` string - so we can add ~ "`" for it - because this is not string end
|
|
|
+ if( (source[i] == '`') && ( (i + 1) < source.length ) && (source[i + 1] != ';') ){ // todo check for '\n' next line for new version lexer-compiler for syntax without ; in lines-commands ends
|
|
|
+ str_helper ~= source[start .. i] ~ "`" ~ `~ "` ~ "`" ~ `" ~ ` ~ "`"; // ` ~ "`" ~ ` -> ` after dlang compiling
|
|
|
+ i++;
|
|
|
+ start = i;
|
|
|
+
|
|
|
+ }else if(source[i] == '`'){ // close quote `
|
|
|
+ inside_string = false;
|
|
|
+
|
|
|
+ }else{ // string not ends yet
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(i < source.length){ // we count close quote
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ if(str_helper != ""){
|
|
|
+ tokens ~= Token(TokenType.String, str_helper ~ source[start .. i]);
|
|
|
+ str_helper = "";
|
|
|
+ }else{
|
|
|
+ tokens ~= Token(TokenType.String, source[start .. i]);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
|
|
|
auto start = i++; // string begin position
|
|
|
- while(i < source.length && source[i] != '"'){ // goto type string end position
|
|
|
+ while( (i < source.length) && (source[i] != '"') ){ // goto Type string end position
|
|
|
i++;
|
|
|
}
|
|
|
- if(i < source.length){ // close quotes
|
|
|
+ if(i < source.length){
|
|
|
i++;
|
|
|
}
|
|
|
tokens ~= Token(TokenType.String, source[start .. i]);
|
|
|
|
|
|
+
|
|
|
}else{ // common symbols as tokens
|
|
|
tokens ~= Token(TokenType.Symbol, source[i].to!string);
|
|
|
i++;
|