221V
/
dopp0


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
							
import dopp;

// helper - is lexeme keyword
export bool isKeyword(string lexeme){
  //static immutable keywords = ["if", "else", "while", "for", "return"];
  static immutable keywords = ["dlang", "ptn", "ptns"];
  return keywords.canFind(lexeme);
}


// lexer - Tokenizer makes tokens from text
//export Token[] tokenize(string source){
export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter, ubyte indent_out){
  Token[] tokens;
  ubyte indent_lvl = 0; // +1 for open curly brace and -1 for close curly brace, 0 in result
  auto indent_symbol = ' ';
  if(indent_type != 0){
    indent_symbol = '\t';
  }
  bool inside_string = false;
  bool inside_comment = false;
  //ubyte inside_string_type = 0; // 0 = ".."; 1 = `..`; 2 = todo
  string str_helper = "";
  int i = 0;
  while(i < source.length){
    
    if(source[i] == '\n'){ // new line
      i++;
      
      if( (i < source.length) &&
          ( (source[i] == indent_symbol) || (indent_lvl > 0) ) ){
        auto start = i;
        while( (i < source.length) && (source[i] == indent_symbol) ){
          i++;
        }
        auto indent_symbols_count = i - start;
        //writeln("indent_symbols_count: ", indent_symbols_count);
        auto maybe_new_indent_lvl = indent_symbols_count / indent_matter;
        //writeln("maybe_new_indent_lvl: ", maybe_new_indent_lvl);
        if(maybe_new_indent_lvl > indent_lvl){
          indent_lvl++;
          string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
          tokens ~= Token(TokenType.Indent_Incr, "{" ~ "\n" ~ new_indent_out);
        
        }else if(maybe_new_indent_lvl < indent_lvl){
          indent_lvl--;
          string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
          
          string maybe_new_line = "\n";
          if( (i + 4) < source.length && source[i .. i + 4] == "else" ){ // maybe next token is else // maybe todo add token else
            maybe_new_line = "";
          }
          
          tokens ~= Token(TokenType.Indent_Decr, "\n" ~ new_indent_out ~ "}" ~ maybe_new_line);
        }
      
      }else if(i > 0){
        tokens ~= Token(TokenType.New_Line, "\n");
      }
    
    }else if(source[i].isWhite){ // skip whitespaces
      i++;
    
    }else if(source[i].isAlpha || (source[i] == '_') ){ // is unicode alphabetic character or underscore
      auto start = i;
      while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
        i++;
      }
      string lexeme = source[start .. i];
      tokens ~= Token(lexeme.isKeyword ? TokenType.Keyword : TokenType.Identifier, lexeme);
    
    
    }else if(source[i].isDigit){ // number
      auto start = i;
      while( (i < source.length) && (source[i].isDigit || (source[i] == '_') ) ){ // underscore can be inside number like 5_000 etc
        i++;
      }
      if( (i < source.length) && (source[i] == '.') ){ // include dot for float
        i++;
        while( (i < source.length) && source[i].isDigit){
          i++;
        }
        tokens ~= Token(TokenType.Float, source[start .. i]);
      
      }else{
        tokens ~= Token(TokenType.Integer, source[start .. i]);
      }
    
    
    }else if( (source[i] == '/') && (inside_string == false) && (inside_comment == false) && ( (i + 1) < source.length ) && (source[i + 1] == '/') ){ // single line comment "//" begins
      auto start = i;
      inside_comment = true;
      i++;
      
      while( inside_comment && (i < source.length) ){ // goto Type single line comment end position
        if(source[i] == '\n'){ // line end means single line comment ends
          inside_comment = false;
        }else{ // single line comment not ends yet
          i++;
        }
      }
      if(inside_comment){ inside_comment = false; }
      tokens ~= Token(TokenType.Comment_Line, source[start .. i]);
    
    
    }else if( (inside_string == false) && (source[i] == '"') ){ // Type string ".." begins
      auto start = i++; // string begin position
      inside_string = true;
      
      while( (i < source.length) && inside_string ){ // goto Type string end position
        if( (source[i] == '\\') && ( (i + 1) < source.length ) && (source[i + 1] == '"') ){ // escaped " is not string end
          i += 2; // skip 2 symbols
          
        }else if(source[i] == '"'){ // close quote "
          inside_string = false;
        
        }else{ // string not ends yet
          i++;
        }
      }
      if(i < source.length){ // we count close quote
        i++;
      }
      tokens ~= Token(TokenType.String, source[start .. i]);
    
    
    }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
      auto start = i++; // string begin position
      inside_string = true;
      while( (i < source.length) && inside_string ){ // goto Type string end position
        
        // ` cannot be escaped in `..` string - so we can add ~ "`" for it - because this is not string end
        if( (source[i] == '`') && ( (i + 1) < source.length ) && (source[i + 1] != ';') ){ // todo check for '\n' next line for new version lexer-compiler for syntax without ; in lines-commands ends
          str_helper ~= source[start .. i] ~ "`" ~ `~ "` ~ "`" ~ `" ~ ` ~ "`"; // ` ~ "`" ~ ` -> ` after dlang compiling
          i++;
          start = i;
          
        }else if(source[i] == '`'){ // close quote `
          inside_string = false;
        
        }else{ // string not ends yet
          i++;
        }
      }
      if(i < source.length){ // we count close quote
        i++;
      }
      if(str_helper != ""){
        tokens ~= Token(TokenType.String, str_helper ~ source[start .. i]);
        str_helper = "";
      }else{
        tokens ~= Token(TokenType.String, source[start .. i]);
      }
    
    
    }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
      auto start = i++; // string begin position
      while( (i < source.length) && (source[i] != '"') ){ // goto Type string end position
        i++;
      }
      if(i < source.length){
        i++;
      }
      tokens ~= Token(TokenType.String, source[start .. i]);
    
    
    }else if( (inside_string == false) && ( (source[i] == '(') || (source[i] == ')') ) ){ // round brackets
      tokens ~= Token(TokenType.Round_Bracket, source[i].to!string);
      i++;
    
    }else{ // common symbols as tokens
      tokens ~= Token(TokenType.Symbol, source[i].to!string);
      i++;
    }
  }
  return tokens;
}