123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179 |
- import dopp;
- export bool isKeyword(string lexeme){
-
- static immutable keywords = ["dlang", "ptn", "ptns"];
- return keywords.canFind(lexeme);
- }
- export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter, ubyte indent_out){
- Token[] tokens;
- ubyte indent_lvl = 0;
- auto indent_symbol = ' ';
- if(indent_type != 0){
- indent_symbol = '\t';
- }
- bool inside_string = false;
- bool inside_comment = false;
-
- string str_helper = "";
- int i = 0;
- while(i < source.length){
-
- if(source[i] == '\n'){
- i++;
-
- if( (i < source.length) &&
- ( (source[i] == indent_symbol) || (indent_lvl > 0) ) ){
- auto start = i;
- while( (i < source.length) && (source[i] == indent_symbol) ){
- i++;
- }
- auto indent_symbols_count = i - start;
-
- auto maybe_new_indent_lvl = indent_symbols_count / indent_matter;
-
- if(maybe_new_indent_lvl > indent_lvl){
- indent_lvl++;
- string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
- tokens ~= Token(TokenType.Indent_Incr, "{" ~ "\n" ~ new_indent_out);
-
- }else if(maybe_new_indent_lvl < indent_lvl){
- indent_lvl--;
- string new_indent_out = indent_symbol.repeat(indent_lvl * indent_out).array.idup;
-
- string maybe_new_line = "\n";
- if( (i + 4) < source.length && source[i .. i + 4] == "else" ){
- maybe_new_line = "";
- }
-
- tokens ~= Token(TokenType.Indent_Decr, "\n" ~ new_indent_out ~ "}" ~ maybe_new_line);
- }
-
- }else if(i > 0){
- tokens ~= Token(TokenType.New_Line, "\n");
- }
-
- }else if(source[i].isWhite){
- i++;
-
- }else if(source[i].isAlpha || (source[i] == '_') ){
- auto start = i;
- while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
- i++;
- }
- string lexeme = source[start .. i];
- tokens ~= Token(lexeme.isKeyword ? TokenType.Keyword : TokenType.Identifier, lexeme);
-
-
- }else if(source[i].isDigit){
- auto start = i;
- while( (i < source.length) && (source[i].isDigit || (source[i] == '_') ) ){
- i++;
- }
- if( (i < source.length) && (source[i] == '.') ){
- i++;
- while( (i < source.length) && source[i].isDigit){
- i++;
- }
- tokens ~= Token(TokenType.Float, source[start .. i]);
-
- }else{
- tokens ~= Token(TokenType.Integer, source[start .. i]);
- }
-
-
- }else if( (source[i] == '/') && (inside_string == false) && (inside_comment == false) && ( (i + 1) < source.length ) && (source[i + 1] == '/') ){
- auto start = i;
- inside_comment = true;
- i++;
-
- while( inside_comment && (i < source.length) ){
- if(source[i] == '\n'){
- inside_comment = false;
- }else{
- i++;
- }
- }
- if(inside_comment){ inside_comment = false; }
- tokens ~= Token(TokenType.Comment_Line, source[start .. i]);
-
-
- }else if( (inside_string == false) && (source[i] == '"') ){
- auto start = i++;
- inside_string = true;
-
- while( (i < source.length) && inside_string ){
- if( (source[i] == '\\') && ( (i + 1) < source.length ) && (source[i + 1] == '"') ){
- i += 2;
-
- }else if(source[i] == '"'){
- inside_string = false;
-
- }else{
- i++;
- }
- }
- if(i < source.length){
- i++;
- }
- tokens ~= Token(TokenType.String, source[start .. i]);
-
-
- }else if( (inside_string == false) && (source[i] == '`') ){
- auto start = i++;
- inside_string = true;
- while( (i < source.length) && inside_string ){
-
-
- if( (source[i] == '`') && ( (i + 1) < source.length ) && (source[i + 1] != ';') ){
- str_helper ~= source[start .. i] ~ "`" ~ `~ "` ~ "`" ~ `" ~ ` ~ "`";
- i++;
- start = i;
-
- }else if(source[i] == '`'){
- inside_string = false;
-
- }else{
- i++;
- }
- }
- if(i < source.length){
- i++;
- }
- if(str_helper != ""){
- tokens ~= Token(TokenType.String, str_helper ~ source[start .. i]);
- str_helper = "";
- }else{
- tokens ~= Token(TokenType.String, source[start .. i]);
- }
-
-
- }else if( (inside_string == false) && (source[i] == '`') ){
- auto start = i++;
- while( (i < source.length) && (source[i] != '"') ){
- i++;
- }
- if(i < source.length){
- i++;
- }
- tokens ~= Token(TokenType.String, source[start .. i]);
-
-
- }else if( (inside_string == false) && ( (source[i] == '(') || (source[i] == ')') ) ){
- tokens ~= Token(TokenType.Round_Bracket, source[i].to!string);
- i++;
-
- }else{
- tokens ~= Token(TokenType.Symbol, source[i].to!string);
- i++;
- }
- }
- return tokens;
- }
|