4 weeks ago · d90fba5abb
--- a/source/dopp.d
+++ b/source/dopp.d
@@ -27,7 +27,9 @@ export enum TokenType{
 
				   Float,
			
 
				   String,
			
 
				   Symbol,
			
 
				-  Whitespace
			
 
				+  Whitespace, // maybe todo add equals and other (=, >=, <=, ==) - needs or not ?
			
 
				+  New_Line,
			
 
				+  Match_Any
			
 
				 }
			
 
				 
			
 
				 
			
@@ -39,7 +41,8 @@ export struct Token{
 
				   string toString(){
			
 
				     import std.format : format;
			
 
				     //return "Token(type: %d, lexeme: %s)".format(type, lexeme);
			
 
				-    return "Token = type: %d, lexeme: %s\n".format(type, lexeme);
			
 
				+    //return "Token = type: %d, lexeme: %s\n".format(type, lexeme);
			
 
				+    return "Token = type: %d, lexeme: %s\n".format(type, ( (TokenType.New_Line == type) ? `\n` : lexeme) );
			
 
				   }
			
 
				 }
			
 
				 
			
@@ -212,18 +215,70 @@ int main(string[] argv){
 
				   ]";
			
 
				   +/
			
 
				   
			
 
				+  //string str1 = "this is test"; // valid in dlang
			
 
				+  //string str1 = "this \"is\" test"; // valid in dlang
			
 
				+  //string str1 = "this `is` test"; // valid in dlang
			
 
				+  //string str1 = `this "is" test`; // valid in dlang
			
 
				+  // //string str1 = `this `is` test`; // not valid in dlang - but valid in my lexer-parser
			
 
				+  //string str1 = `this ` ~ "`is`" ~ ` test`; // valid in dlang
			
 
				+  // todo add to lexer r" .. ";
			
 
				+  // todo add to lexer multiline q"[ .. ]";
			
 
				+  // todo add to lexer //string str1 = x"48 65 6C 6C 6F"; // valid in dlang - hex string // Hello
			
 
				+  //writeln(str1);
			
 
				+  
			
 
				+  //writeln("hey" ~ '\n' ~ "hello");
			
 
				+  
			
 
				+  string source = q"[
			
 
				+auto y1 = "this is test";
			
 
				+auto y2 = "this \"is\" test";
			
 
				+auto y3 = `this "is" test`;
			
 
				+auto y4 = `this is test`;
			
 
				+auto y5 = `this `is` test`;
			
 
				+  ]"; // "
			
 
				+  
			
 
				+/+
			
 
				   string source = q"[
			
 
				 auto x = 5;
			
 
				 auto x2 = 5_001;
			
 
				-auto y = "this is test";
			
 
				+auto y1 = "this is test";
			
 
				+auto y2 = "this \"is\" test";
			
 
				+auto y3 = `this "is" test`;
			
 
				 auto z = 1;
			
 
				+  ]"; // "
			
 
				+  +/
			
 
				+  
			
 
				+  /+
			
 
				+  string source = q"[
			
 
				+void main(){
			
 
				+  auto x = 5;
			
 
				+  auto y = ptn x
			
 
				+    1 = 1
			
 
				+    5 = 5 * 2
			
 
				+    9 = 9 * 3
			
 
				+    _ = x * 4
			
 
				+}
			
 
				   ]";
			
 
				+  +/
			
 
				   
			
 
				-  auto tokens = tokenize(source);
			
 
				+  auto tokens = tokenize(source, 0, 2); // indent_type = 0 = whitespace; indent_matter = 2 (whitespaces)
			
 
				   writeln(tokens);
			
 
				   auto result = parse(tokens); // indent_out = 2 (whitespaces in output-generated code)
			
 
				   writeln(result);
			
 
				   
			
 
				+  /+
			
 
				+void main(){
			
 
				+  auto x = 5;
			
 
				+  if(x == 1){
			
 
				+    y = 1;
			
 
				+  }else if(x == 5){
			
 
				+    y = 5 * 2; // return x * 2;
			
 
				+  }else if(x == 9){
			
 
				+    y = 9 * 3; // return x * 3;
			
 
				+  }else{ // other cases
			
 
				+    y = x * 4;
			
 
				+  }
			
 
				+}
			
 
				+  +/
			
 
				   
			
 
				   
			
 
				   return 1;
			
--- a/source/dopp_lexer.d
+++ b/source/dopp_lexer.d
@@ -3,36 +3,48 @@ import dopp;
 
				 
			
 
				 // helper - is lexeme keyword
			
 
				 export bool isKeyword(string lexeme){
			
 
				-  static immutable keywords = ["if", "else", "while", "for", "return"];
			
 
				+  //static immutable keywords = ["if", "else", "while", "for", "return"];
			
 
				+  static immutable keywords = ["dlang", "ptn", "ptns"];
			
 
				   return keywords.canFind(lexeme);
			
 
				 }
			
 
				 
			
 
				 
			
 
				 // lexer - Tokenizer makes tokens from text
			
 
				-export Token[] tokenize(string source){
			
 
				+//export Token[] tokenize(string source){
			
 
				+export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
			
 
				   Token[] tokens;
			
 
				+  bool inside_string = false;
			
 
				+  //ubyte inside_string_type = 0; // 0 = ".."; 1 = `..`; 2 = todo
			
 
				+  string str_helper = "";
			
 
				   int i = 0;
			
 
				   while(i < source.length){
			
 
				     
			
 
				-    if(source[i].isWhite){ // skip whitespaces
			
 
				+    if(source[i] == '\n'){ // new line
			
 
				+      tokens ~= Token(TokenType.New_Line, "\n");
			
 
				       i++;
			
 
				     
			
 
				-    }else if(source[i].isAlpha || source[i] == '_'){
			
 
				+    
			
 
				+    }else if(source[i].isWhite){ // skip whitespaces
			
 
				+      i++;
			
 
				+    
			
 
				+    
			
 
				+    }else if(source[i].isAlpha || (source[i] == '_') ){ // is unicode alphabetic character or underscore
			
 
				       auto start = i;
			
 
				-      while(i < source.length && (source[i].isAlphaNum || source[i] == '_')){
			
 
				+      while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
			
 
				         i++;
			
 
				       }
			
 
				       string lexeme = source[start .. i];
			
 
				       tokens ~= Token(lexeme.isKeyword ? TokenType.Keyword : TokenType.Identifier, lexeme);
			
 
				     
			
 
				+    
			
 
				     }else if(source[i].isDigit){ // number
			
 
				       auto start = i;
			
 
				-      while(i < source.length && (source[i].isDigit || source[i] == '_')){ // underscore can be inside number like 5_000 etc
			
 
				+      while( (i < source.length) && (source[i].isDigit || (source[i] == '_') ) ){ // underscore can be inside number like 5_000 etc
			
 
				         i++;
			
 
				       }
			
 
				-      if(i < source.length && source[i] == '.'){ // include dot for float
			
 
				+      if( (i < source.length) && (source[i] == '.') ){ // include dot for float
			
 
				         i++;
			
 
				-        while(i < source.length && source[i].isDigit){
			
 
				+        while( (i < source.length) && source[i].isDigit){
			
 
				           i++;
			
 
				         }
			
 
				         tokens ~= Token(TokenType.Float, source[start .. i]);
			
@@ -41,16 +53,68 @@ export Token[] tokenize(string source){
 
				         tokens ~= Token(TokenType.Integer, source[start .. i]);
			
 
				       }
			
 
				     
			
 
				-    }else if(source[i] == '"'){ // type string begins
			
 
				+    
			
 
				+    }else if( (inside_string == false) && (source[i] == '"') ){ // Type string ".." begins
			
 
				+      auto start = i++; // string begin position
			
 
				+      inside_string = true;
			
 
				+      
			
 
				+      while( (i < source.length) && inside_string ){ // goto Type string end position
			
 
				+        if( (source[i] == '\\') && ( (i + 1) < source.length ) && (source[i + 1] == '"') ){ // escaped " is not string end
			
 
				+          i += 2; // skip 2 symbols
			
 
				+          
			
 
				+        }else if(source[i] == '"'){ // close quote "
			
 
				+          inside_string = false;
			
 
				+        
			
 
				+        }else{ // string not ends yet
			
 
				+          i++;
			
 
				+        }
			
 
				+      }
			
 
				+      if(i < source.length){ // we count close quote
			
 
				+        i++;
			
 
				+      }
			
 
				+      tokens ~= Token(TokenType.String, source[start .. i]);
			
 
				+    
			
 
				+    
			
 
				+    }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
			
 
				+      auto start = i++; // string begin position
			
 
				+      inside_string = true;
			
 
				+      while( (i < source.length) && inside_string ){ // goto Type string end position
			
 
				+        
			
 
				+        // ` cannot be escaped in `..` string - so we can add ~ "`" for it - because this is not string end
			
 
				+        if( (source[i] == '`') && ( (i + 1) < source.length ) && (source[i + 1] != ';') ){ // todo check for '\n' next line for new version lexer-compiler for syntax without ; in lines-commands ends
			
 
				+          str_helper ~= source[start .. i] ~ "`" ~ `~ "` ~ "`" ~ `" ~ ` ~ "`"; // ` ~ "`" ~ ` -> ` after dlang compiling
			
 
				+          i++;
			
 
				+          start = i;
			
 
				+          
			
 
				+        }else if(source[i] == '`'){ // close quote `
			
 
				+          inside_string = false;
			
 
				+        
			
 
				+        }else{ // string not ends yet
			
 
				+          i++;
			
 
				+        }
			
 
				+      }
			
 
				+      if(i < source.length){ // we count close quote
			
 
				+        i++;
			
 
				+      }
			
 
				+      if(str_helper != ""){
			
 
				+        tokens ~= Token(TokenType.String, str_helper ~ source[start .. i]);
			
 
				+        str_helper = "";
			
 
				+      }else{
			
 
				+        tokens ~= Token(TokenType.String, source[start .. i]);
			
 
				+      }
			
 
				+    
			
 
				+    
			
 
				+    }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
			
 
				       auto start = i++; // string begin position
			
 
				-      while(i < source.length && source[i] != '"'){ // goto type string end position
			
 
				+      while( (i < source.length) && (source[i] != '"') ){ // goto Type string end position
			
 
				         i++;
			
 
				       }
			
 
				-      if(i < source.length){ // close quotes
			
 
				+      if(i < source.length){
			
 
				         i++;
			
 
				       }
			
 
				       tokens ~= Token(TokenType.String, source[start .. i]);
			
 
				     
			
 
				+    
			
 
				     }else{ // common symbols as tokens
			
 
				       tokens ~= Token(TokenType.Symbol, source[i].to!string);
			
 
				       i++;