|
@@ -11,8 +11,13 @@ export bool isKeyword(string lexeme){
|
|
|
|
|
|
// lexer - Tokenizer makes tokens from text
|
|
|
//export Token[] tokenize(string source){
|
|
|
-export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
|
|
|
+export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter, ubyte indent_out){
|
|
|
Token[] tokens;
|
|
|
+ ubyte indent_lvl = 0; // +1 for open curly brace and -1 for close curly brace, 0 in result
|
|
|
+ auto indent_symbol = ' ';
|
|
|
+ if(indent_type != 0){
|
|
|
+ indent_symbol = '\t';
|
|
|
+ }
|
|
|
bool inside_string = false;
|
|
|
bool inside_comment = false;
|
|
|
//ubyte inside_string_type = 0; // 0 = ".."; 1 = `..`; 2 = todo
|
|
@@ -21,14 +26,42 @@ export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
|
|
|
while(i < source.length){
|
|
|
|
|
|
if(source[i] == '\n'){ // new line
|
|
|
- tokens ~= Token(TokenType.New_Line, "\n");
|
|
|
i++;
|
|
|
-
|
|
|
+
|
|
|
+ if( (i < source.length) &&
|
|
|
+ ( (source[i] == indent_symbol) || (indent_lvl > 0) ) ){
|
|
|
+ auto start = i;
|
|
|
+ while( (i < source.length) && (source[i] == indent_symbol) ){
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ auto indent_symbols_count = i - start;
|
|
|
+ //writeln("indent_symbols_count: ", indent_symbols_count);
|
|
|
+ auto maybe_new_indent_lvl = indent_symbols_count / indent_matter;
|
|
|
+ //writeln("maybe_new_indent_lvl: ", maybe_new_indent_lvl);
|
|
|
+ if(maybe_new_indent_lvl > indent_lvl){
|
|
|
+ indent_lvl++;
|
|
|
+ string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
|
|
|
+ tokens ~= Token(TokenType.Indent_Incr, "{" ~ "\n" ~ new_indent_out);
|
|
|
+
|
|
|
+ }else if(maybe_new_indent_lvl < indent_lvl){
|
|
|
+ indent_lvl--;
|
|
|
+ string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
|
|
|
+
|
|
|
+ string maybe_new_line = "\n";
|
|
|
+ if( (i + 4) < source.length && source[i .. i + 4] == "else" ){ // maybe next token is else // maybe todo add token else
|
|
|
+ maybe_new_line = "";
|
|
|
+ }
|
|
|
+
|
|
|
+ tokens ~= Token(TokenType.Indent_Decr, "\n" ~ new_indent_out ~ "}" ~ maybe_new_line);
|
|
|
+ }
|
|
|
+
|
|
|
+ }else if(i > 0){
|
|
|
+ tokens ~= Token(TokenType.New_Line, "\n");
|
|
|
+ }
|
|
|
|
|
|
}else if(source[i].isWhite){ // skip whitespaces
|
|
|
i++;
|
|
|
|
|
|
-
|
|
|
}else if(source[i].isAlpha || (source[i] == '_') ){ // is unicode alphabetic character or underscore
|
|
|
auto start = i;
|
|
|
while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
|
|
@@ -132,6 +165,10 @@ export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
|
|
|
tokens ~= Token(TokenType.String, source[start .. i]);
|
|
|
|
|
|
|
|
|
+ }else if( (inside_string == false) && ( (source[i] == '(') || (source[i] == ')') ) ){ // round brackets
|
|
|
+ tokens ~= Token(TokenType.Round_Bracket, source[i].to!string);
|
|
|
+ i++;
|
|
|
+
|
|
|
}else{ // common symbols as tokens
|
|
|
tokens ~= Token(TokenType.Symbol, source[i].to!string);
|
|
|
i++;
|