dopp_lexer.d 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import dopp;
  2. // helper - is lexeme keyword
  3. export bool isKeyword(string lexeme){
  4. //static immutable keywords = ["if", "else", "while", "for", "return"];
  5. static immutable keywords = ["dlang", "ptn", "ptns"];
  6. return keywords.canFind(lexeme);
  7. }
  8. // lexer - Tokenizer makes tokens from text
  9. //export Token[] tokenize(string source){
  10. export Token[] tokenize(string source, ubyte indent_type, ubyte indent_matter){
  11. Token[] tokens;
  12. bool inside_string = false;
  13. bool inside_comment = false;
  14. //ubyte inside_string_type = 0; // 0 = ".."; 1 = `..`; 2 = todo
  15. string str_helper = "";
  16. int i = 0;
  17. while(i < source.length){
  18. if(source[i] == '\n'){ // new line
  19. tokens ~= Token(TokenType.New_Line, "\n");
  20. i++;
  21. }else if(source[i].isWhite){ // skip whitespaces
  22. i++;
  23. }else if(source[i].isAlpha || (source[i] == '_') ){ // is unicode alphabetic character or underscore
  24. auto start = i;
  25. while( (i < source.length) && (source[i].isAlphaNum || (source[i] == '_') ) ){
  26. i++;
  27. }
  28. string lexeme = source[start .. i];
  29. tokens ~= Token(lexeme.isKeyword ? TokenType.Keyword : TokenType.Identifier, lexeme);
  30. }else if(source[i].isDigit){ // number
  31. auto start = i;
  32. while( (i < source.length) && (source[i].isDigit || (source[i] == '_') ) ){ // underscore can be inside number like 5_000 etc
  33. i++;
  34. }
  35. if( (i < source.length) && (source[i] == '.') ){ // include dot for float
  36. i++;
  37. while( (i < source.length) && source[i].isDigit){
  38. i++;
  39. }
  40. tokens ~= Token(TokenType.Float, source[start .. i]);
  41. }else{
  42. tokens ~= Token(TokenType.Integer, source[start .. i]);
  43. }
  44. }else if( (source[i] == '/') && (inside_string == false) && (inside_comment == false) && ( (i + 1) < source.length ) && (source[i + 1] == '/') ){ // single line comment "//" begins
  45. auto start = i;
  46. inside_comment = true;
  47. i++;
  48. while( inside_comment && (i < source.length) ){ // goto Type single line comment end position
  49. if(source[i] == '\n'){ // line end means single line comment ends
  50. inside_comment = false;
  51. }else{ // single line comment not ends yet
  52. i++;
  53. }
  54. }
  55. if(inside_comment){ inside_comment = false; }
  56. tokens ~= Token(TokenType.Comment_Line, source[start .. i]);
  57. }else if( (inside_string == false) && (source[i] == '"') ){ // Type string ".." begins
  58. auto start = i++; // string begin position
  59. inside_string = true;
  60. while( (i < source.length) && inside_string ){ // goto Type string end position
  61. if( (source[i] == '\\') && ( (i + 1) < source.length ) && (source[i + 1] == '"') ){ // escaped " is not string end
  62. i += 2; // skip 2 symbols
  63. }else if(source[i] == '"'){ // close quote "
  64. inside_string = false;
  65. }else{ // string not ends yet
  66. i++;
  67. }
  68. }
  69. if(i < source.length){ // we count close quote
  70. i++;
  71. }
  72. tokens ~= Token(TokenType.String, source[start .. i]);
  73. }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
  74. auto start = i++; // string begin position
  75. inside_string = true;
  76. while( (i < source.length) && inside_string ){ // goto Type string end position
  77. // ` cannot be escaped in `..` string - so we can add ~ "`" for it - because this is not string end
  78. if( (source[i] == '`') && ( (i + 1) < source.length ) && (source[i + 1] != ';') ){ // todo check for '\n' next line for new version lexer-compiler for syntax without ; in lines-commands ends
  79. str_helper ~= source[start .. i] ~ "`" ~ `~ "` ~ "`" ~ `" ~ ` ~ "`"; // ` ~ "`" ~ ` -> ` after dlang compiling
  80. i++;
  81. start = i;
  82. }else if(source[i] == '`'){ // close quote `
  83. inside_string = false;
  84. }else{ // string not ends yet
  85. i++;
  86. }
  87. }
  88. if(i < source.length){ // we count close quote
  89. i++;
  90. }
  91. if(str_helper != ""){
  92. tokens ~= Token(TokenType.String, str_helper ~ source[start .. i]);
  93. str_helper = "";
  94. }else{
  95. tokens ~= Token(TokenType.String, source[start .. i]);
  96. }
  97. }else if( (inside_string == false) && (source[i] == '`') ){ // Type string `..` begins
  98. auto start = i++; // string begin position
  99. while( (i < source.length) && (source[i] != '"') ){ // goto Type string end position
  100. i++;
  101. }
  102. if(i < source.length){
  103. i++;
  104. }
  105. tokens ~= Token(TokenType.String, source[start .. i]);
  106. }else{ // common symbols as tokens
  107. tokens ~= Token(TokenType.Symbol, source[i].to!string);
  108. i++;
  109. }
  110. }
  111. return tokens;
  112. }