2 months ago · b7dad275d5
--- a/source/dopp.d
+++ b/source/dopp.d
@@ -4,13 +4,14 @@ import toml;
 
				 public{
			
 
				   import std.stdio : writeln;
			
 
				   import std.file : read;
			
 
				-  import std.array;
			
 
				+  import std.array; // .array.idup + other
			
 
				   import std.string;
			
 
				   import std.uni : isWhite, isAlpha, isAlphaNum;
			
 
				   import std.ascii : isDigit;
			
 
				   import std.conv : to;
			
 
				   import std.algorithm;
			
 
				   import std.sumtype : SumType;
			
 
				+  import std.range : repeat;
			
 
				 }
			
 
				 
			
 
				 import dopp_lexer : tokenize, isKeyword;
			
@@ -28,8 +29,11 @@ export enum TokenType{
 
				   String,
			
 
				   Symbol,
			
 
				   Whitespace, // maybe todo add equals and other (=, >=, <=, ==) - needs or not ?
			
 
				+  Round_Bracket,
			
 
				   Comment_Line,
			
 
				   New_Line,
			
 
				+  Indent_Incr,
			
 
				+  Indent_Decr,
			
 
				   Match_Any
			
 
				 }
			
 
				 
			
@@ -41,9 +45,14 @@ export struct Token{
 
				   // for debug
			
 
				   string toString(){
			
 
				     import std.format : format;
			
 
				+    import std.string : replace;
			
 
				     //return "Token(type: %d, lexeme: %s)".format(type, lexeme);
			
 
				     //return "Token = type: %d, lexeme: %s\n".format(type, lexeme);
			
 
				-    return "Token = type: %d, lexeme: %s\n".format(type, ( (TokenType.New_Line == type) ? `\n` : lexeme) );
			
 
				+    return "Token = type: %d, lexeme: %s\n".format(type,
			
 
				+      ( (type == TokenType.New_Line) ?
			
 
				+          `\n` :
			
 
				+          ( (type == TokenType.Indent_Incr) || (type == TokenType.Indent_Decr) ) ?
			
 
				+            lexeme.replace("\n", "\\n") : lexeme ) );
			
 
				   }
			
 
				 }
			
 
				 
			
@@ -202,7 +211,7 @@ int main(string[] argv){
 
				   // writeln("valid_argv = ", valid_argv); // todo add config key-value "no_warn" = false | true for silence
			
 
				   
			
 
				   if(!valid_argv){
			
 
				-    return 0;
			
 
				+    return 1;
			
 
				   }
			
 
				   
			
 
				   
			
@@ -231,11 +240,13 @@ int main(string[] argv){
 
				   
			
 
				   string source = q"[
			
 
				 auto y1 = "this is test";
			
 
				-auto y2 = "this \"is\" test";
			
 
				-auto y3 = `this "is" test`; // this is single line comment
			
 
				+if(true)
			
 
				+    auto y2 = "this \"is\" test";
			
 
				+else
			
 
				+    auto y3 = `this "is" test`; // this is single line comment
			
 
				 auto y4 = `this is test`;
			
 
				 auto y5 = `this `is` test`;
			
 
				-  ]"; // "
			
 
				+]"; // "
			
 
				   
			
 
				 /+
			
 
				   string source = q"[
			
@@ -261,9 +272,9 @@ void main(){
 
				   ]";
			
 
				   +/
			
 
				   
			
 
				-  auto tokens = tokenize(source, 0, 2); // indent_type = 0 = whitespace; indent_matter = 2 (whitespaces)
			
 
				+  auto tokens = tokenize(source, 0, 2, 2); // indent_type = 0 = whitespace; indent_matter = 2 (whitespaces); indent_out = 2 (whitespaces in output-generated code)
			
 
				   writeln(tokens);
			
 
				-  auto result = parse(tokens); // indent_out = 2 (whitespaces in output-generated code)
			
 
				+  auto result = parse(tokens);
			
 
				   writeln(result);
			
 
				   
			
 
				   /+
			
@@ -282,7 +293,7 @@ void main(){
 
				   +/
			
 
				   
			
 
				   
			
 
				-  return 1;
			
 
				+  return 0;
			
 
				 }
			
 
				 
			
 
				 
			
--- a/source/dopp_lexer.d
+++ b/source/dopp_lexer.d
@@ -11,8 +11,13 @@ export bool isKeyword(string lexeme){
 
				 
			
 
				 // lexer - Tokenizer makes tokens from text
			
 
				 //export Token[] tokenize(string source){
			
 
				-export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
			
 
				+export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter, ubyte indent_out){
			
 
				   Token[] tokens;
			
 
				+  ubyte indent_lvl = 0; // +1 for open curly brace and -1 for close curly brace, 0 in result
			
 
				+  auto indent_symbol = ' ';
			
 
				+  if(indent_type != 0){
			
 
				+    indent_symbol = '\t';
			
 
				+  }
			
 
				   bool inside_string = false;
			
 
				   bool inside_comment = false;
			
 
				   //ubyte inside_string_type = 0; // 0 = ".."; 1 = `..`; 2 = todo
			
@@ -21,14 +26,42 @@ export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
 
				   while(i < source.length){
			
 
				     
			
 
				     if(source[i] == '\n'){ // new line
			
 
				-      tokens ~= Token(TokenType.New_Line, "\n");
			
 
				       i++;
			
 
				-    
			
 
				+      
			
 
				+      if( (i < source.length) &&
			
 
				+          ( (source[i] == indent_symbol) || (indent_lvl > 0) ) ){
			
 
				+        auto start = i;
			
 
				+        while( (i < source.length) && (source[i] == indent_symbol) ){
			
 
				+          i++;
			
 
				+        }
			
 
				+        auto indent_symbols_count = i - start;
			
 
				+        //writeln("indent_symbols_count: ", indent_symbols_count);
			
 
				+        auto maybe_new_indent_lvl = indent_symbols_count / indent_matter;
			
 
				+        //writeln("maybe_new_indent_lvl: ", maybe_new_indent_lvl);
			
 
				+        if(maybe_new_indent_lvl > indent_lvl){
			
 
				+          indent_lvl++;
			
 
				+          string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
			
 
				+          tokens ~= Token(TokenType.Indent_Incr, "{" ~ "\n" ~ new_indent_out);
			
 
				+        
			
 
				+        }else if(maybe_new_indent_lvl < indent_lvl){
			
 
				+          indent_lvl--;
			
 
				+          string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
			
 
				+          
			
 
				+          string maybe_new_line = "\n";
			
 
				+          if( (i + 4) < source.length && source[i .. i + 4] == "else" ){ // maybe next token is else // maybe todo add token else
			
 
				+            maybe_new_line = "";
			
 
				+          }
			
 
				+          
			
 
				+          tokens ~= Token(TokenType.Indent_Decr, "\n" ~ new_indent_out ~ "}" ~ maybe_new_line);
			
 
				+        }
			
 
				+      
			
 
				+      }else if(i > 0){
			
 
				+        tokens ~= Token(TokenType.New_Line, "\n");
			
 
				+      }
			
 
				     
			
 
				     }else if(source[i].isWhite){ // skip whitespaces
			
 
				       i++;
			
 
				     
			
 
				-    
			
 
				     }else if(source[i].isAlpha || (source[i] == '_') ){ // is unicode alphabetic character or underscore
			
 
				       auto start = i;
			
 
				       while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
			
@@ -132,6 +165,10 @@ export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
 
				       tokens ~= Token(TokenType.String, source[start .. i]);
			
 
				     
			
 
				     
			
 
				+    }else if( (inside_string == false) && ( (source[i] == '(') || (source[i] == ')') ) ){ // round brackets
			
 
				+      tokens ~= Token(TokenType.Round_Bracket, source[i].to!string);
			
 
				+      i++;
			
 
				+    
			
 
				     }else{ // common symbols as tokens
			
 
				       tokens ~= Token(TokenType.Symbol, source[i].to!string);
			
 
				       i++;
			
--- a/source/dopp_parser.d
+++ b/source/dopp_parser.d
@@ -53,8 +53,16 @@ export string parse(Token[] tokens){
 
				     }
			
 
				     +/
			
 
				     
			
 
				-    if( (tokens[i].type != TokenType.New_Line) && ( (i + 1) < tokens.length ) && (tokens[i + 1].lexeme != ";") && (tokens[i + 1].type != TokenType.New_Line) ){
			
 
				-      result ~= tokens[i].lexeme ~ " "; // add whitespace between lexemes, but not after "\n" new_line & not after ";"
			
 
				+    if( (tokens[i].type != TokenType.New_Line) &&
			
 
				+        (tokens[i].type != TokenType.Indent_Incr) &&
			
 
				+        (tokens[i].type != TokenType.Indent_Decr) &&
			
 
				+        (tokens[i].type != TokenType.Round_Bracket) &&
			
 
				+        ( (i + 1) < tokens.length ) && (tokens[i + 1].lexeme != ";") &&
			
 
				+        (tokens[i + 1].type != TokenType.New_Line) &&
			
 
				+        (tokens[i + 1].type != TokenType.Indent_Incr) &&
			
 
				+        (tokens[i + 1].type != TokenType.Indent_Decr) &&
			
 
				+        (tokens[i + 1].type != TokenType.Round_Bracket) ){
			
 
				+      result ~= tokens[i].lexeme ~ " "; // add whitespace between lexemes, but not after "\n" new_line, ";", "{", "}", "(", ")"
			
 
				     
			
 
				     }else{
			
 
				       result ~= tokens[i].lexeme; // for just show all text