Browse Source

indents instead of curly braces

221V 1 week ago
parent
commit
b7dad275d5
3 changed files with 71 additions and 15 deletions
  1. 20 9
      source/dopp.d
  2. 41 4
      source/dopp_lexer.d
  3. 10 2
      source/dopp_parser.d

+ 20 - 9
source/dopp.d

@@ -4,13 +4,14 @@ import toml;
 public{
 public{
   import std.stdio : writeln;
   import std.stdio : writeln;
   import std.file : read;
   import std.file : read;
-  import std.array;
+  import std.array; // .array.idup + other
   import std.string;
   import std.string;
   import std.uni : isWhite, isAlpha, isAlphaNum;
   import std.uni : isWhite, isAlpha, isAlphaNum;
   import std.ascii : isDigit;
   import std.ascii : isDigit;
   import std.conv : to;
   import std.conv : to;
   import std.algorithm;
   import std.algorithm;
   import std.sumtype : SumType;
   import std.sumtype : SumType;
+  import std.range : repeat;
 }
 }
 
 
 import dopp_lexer : tokenize, isKeyword;
 import dopp_lexer : tokenize, isKeyword;
@@ -28,8 +29,11 @@ export enum TokenType{
   String,
   String,
   Symbol,
   Symbol,
   Whitespace, // maybe todo add equals and other (=, >=, <=, ==) - needs or not ?
   Whitespace, // maybe todo add equals and other (=, >=, <=, ==) - needs or not ?
+  Round_Bracket,
   Comment_Line,
   Comment_Line,
   New_Line,
   New_Line,
+  Indent_Incr,
+  Indent_Decr,
   Match_Any
   Match_Any
 }
 }
 
 
@@ -41,9 +45,14 @@ export struct Token{
   // for debug
   // for debug
   string toString(){
   string toString(){
     import std.format : format;
     import std.format : format;
+    import std.string : replace;
     //return "Token(type: %d, lexeme: %s)".format(type, lexeme);
     //return "Token(type: %d, lexeme: %s)".format(type, lexeme);
     //return "Token = type: %d, lexeme: %s\n".format(type, lexeme);
     //return "Token = type: %d, lexeme: %s\n".format(type, lexeme);
-    return "Token = type: %d, lexeme: %s\n".format(type, ( (TokenType.New_Line == type) ? `\n` : lexeme) );
+    return "Token = type: %d, lexeme: %s\n".format(type,
+      ( (type == TokenType.New_Line) ?
+          `\n` :
+          ( (type == TokenType.Indent_Incr) || (type == TokenType.Indent_Decr) ) ?
+            lexeme.replace("\n", "\\n") : lexeme ) );
   }
   }
 }
 }
 
 
@@ -202,7 +211,7 @@ int main(string[] argv){
   // writeln("valid_argv = ", valid_argv); // todo add config key-value "no_warn" = false | true for silence
   // writeln("valid_argv = ", valid_argv); // todo add config key-value "no_warn" = false | true for silence
   
   
   if(!valid_argv){
   if(!valid_argv){
-    return 0;
+    return 1;
   }
   }
   
   
   
   
@@ -231,11 +240,13 @@ int main(string[] argv){
   
   
   string source = q"[
   string source = q"[
 auto y1 = "this is test";
 auto y1 = "this is test";
-auto y2 = "this \"is\" test";
-auto y3 = `this "is" test`; // this is single line comment
+if(true)
+    auto y2 = "this \"is\" test";
+else
+    auto y3 = `this "is" test`; // this is single line comment
 auto y4 = `this is test`;
 auto y4 = `this is test`;
 auto y5 = `this `is` test`;
 auto y5 = `this `is` test`;
-  ]"; // "
+]"; // "
   
   
 /+
 /+
   string source = q"[
   string source = q"[
@@ -261,9 +272,9 @@ void main(){
   ]";
   ]";
   +/
   +/
   
   
-  auto tokens = tokenize(source, 0, 2); // indent_type = 0 = whitespace; indent_matter = 2 (whitespaces)
+  auto tokens = tokenize(source, 0, 2, 2); // indent_type = 0 = whitespace; indent_matter = 2 (whitespaces); indent_out = 2 (whitespaces in output-generated code)
   writeln(tokens);
   writeln(tokens);
-  auto result = parse(tokens); // indent_out = 2 (whitespaces in output-generated code)
+  auto result = parse(tokens);
   writeln(result);
   writeln(result);
   
   
   /+
   /+
@@ -282,7 +293,7 @@ void main(){
   +/
   +/
   
   
   
   
-  return 1;
+  return 0;
 }
 }
 
 
 
 

+ 41 - 4
source/dopp_lexer.d

@@ -11,8 +11,13 @@ export bool isKeyword(string lexeme){
 
 
 // lexer - Tokenizer makes tokens from text
 // lexer - Tokenizer makes tokens from text
 //export Token[] tokenize(string source){
 //export Token[] tokenize(string source){
-export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
+export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter, ubyte indent_out){
   Token[] tokens;
   Token[] tokens;
+  ubyte indent_lvl = 0; // +1 for open curly brace and -1 for close curly brace, 0 in result
+  auto indent_symbol = ' ';
+  if(indent_type != 0){
+    indent_symbol = '\t';
+  }
   bool inside_string = false;
   bool inside_string = false;
   bool inside_comment = false;
   bool inside_comment = false;
   //ubyte inside_string_type = 0; // 0 = ".."; 1 = `..`; 2 = todo
   //ubyte inside_string_type = 0; // 0 = ".."; 1 = `..`; 2 = todo
@@ -21,14 +26,42 @@ export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
   while(i < source.length){
   while(i < source.length){
     
     
     if(source[i] == '\n'){ // new line
     if(source[i] == '\n'){ // new line
-      tokens ~= Token(TokenType.New_Line, "\n");
       i++;
       i++;
-    
+      
+      if( (i < source.length) &&
+          ( (source[i] == indent_symbol) || (indent_lvl > 0) ) ){
+        auto start = i;
+        while( (i < source.length) && (source[i] == indent_symbol) ){
+          i++;
+        }
+        auto indent_symbols_count = i - start;
+        //writeln("indent_symbols_count: ", indent_symbols_count);
+        auto maybe_new_indent_lvl = indent_symbols_count / indent_matter;
+        //writeln("maybe_new_indent_lvl: ", maybe_new_indent_lvl);
+        if(maybe_new_indent_lvl > indent_lvl){
+          indent_lvl++;
+          string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
+          tokens ~= Token(TokenType.Indent_Incr, "{" ~ "\n" ~ new_indent_out);
+        
+        }else if(maybe_new_indent_lvl < indent_lvl){
+          indent_lvl--;
+          string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
+          
+          string maybe_new_line = "\n";
+          if( (i + 4) < source.length && source[i .. i + 4] == "else" ){ // maybe next token is else // maybe todo add token else
+            maybe_new_line = "";
+          }
+          
+          tokens ~= Token(TokenType.Indent_Decr, "\n" ~ new_indent_out ~ "}" ~ maybe_new_line);
+        }
+      
+      }else if(i > 0){
+        tokens ~= Token(TokenType.New_Line, "\n");
+      }
     
     
     }else if(source[i].isWhite){ // skip whitespaces
     }else if(source[i].isWhite){ // skip whitespaces
       i++;
       i++;
     
     
-    
     }else if(source[i].isAlpha || (source[i] == '_') ){ // is unicode alphabetic character or underscore
     }else if(source[i].isAlpha || (source[i] == '_') ){ // is unicode alphabetic character or underscore
       auto start = i;
       auto start = i;
       while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
       while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
@@ -132,6 +165,10 @@ export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
       tokens ~= Token(TokenType.String, source[start .. i]);
       tokens ~= Token(TokenType.String, source[start .. i]);
     
     
     
     
+    }else if( (inside_string == false) && ( (source[i] == '(') || (source[i] == ')') ) ){ // round brackets
+      tokens ~= Token(TokenType.Round_Bracket, source[i].to!string);
+      i++;
+    
     }else{ // common symbols as tokens
     }else{ // common symbols as tokens
       tokens ~= Token(TokenType.Symbol, source[i].to!string);
       tokens ~= Token(TokenType.Symbol, source[i].to!string);
       i++;
       i++;

+ 10 - 2
source/dopp_parser.d

@@ -53,8 +53,16 @@ export string parse(Token[] tokens){
     }
     }
     +/
     +/
     
     
-    if( (tokens[i].type != TokenType.New_Line) && ( (i + 1) < tokens.length ) && (tokens[i + 1].lexeme != ";") && (tokens[i + 1].type != TokenType.New_Line) ){
-      result ~= tokens[i].lexeme ~ " "; // add whitespace between lexemes, but not after "\n" new_line & not after ";"
+    if( (tokens[i].type != TokenType.New_Line) &&
+        (tokens[i].type != TokenType.Indent_Incr) &&
+        (tokens[i].type != TokenType.Indent_Decr) &&
+        (tokens[i].type != TokenType.Round_Bracket) &&
+        ( (i + 1) < tokens.length ) && (tokens[i + 1].lexeme != ";") &&
+        (tokens[i + 1].type != TokenType.New_Line) &&
+        (tokens[i + 1].type != TokenType.Indent_Incr) &&
+        (tokens[i + 1].type != TokenType.Indent_Decr) &&
+        (tokens[i + 1].type != TokenType.Round_Bracket) ){
+      result ~= tokens[i].lexeme ~ " "; // add whitespace between lexemes, but not after "\n" new_line, ";", "{", "}", "(", ")"
     
     
     }else{
     }else{
       result ~= tokens[i].lexeme; // for just show all text
       result ~= tokens[i].lexeme; // for just show all text