dopp_lexer.d 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import dopp;
  2. // helper - is lexeme keyword
  3. export bool isKeyword(string lexeme){
  4. static immutable keywords = ["if", "else", "while", "for", "return"];
  5. return keywords.canFind(lexeme);
  6. }
  7. // lexer - Tokenizer makes tokens from text
  8. export Token[] tokenize(string source){
  9. Token[] tokens;
  10. int i = 0;
  11. while(i < source.length){
  12. if(source[i].isWhite){ // skip whitespaces
  13. i++;
  14. }else if(source[i].isAlpha || source[i] == '_'){
  15. auto start = i;
  16. while(i < source.length && (source[i].isAlphaNum || source[i] == '_')){
  17. i++;
  18. }
  19. string lexeme = source[start .. i];
  20. tokens ~= Token(lexeme.isKeyword ? TokenType.Keyword : TokenType.Identifier, lexeme);
  21. }else if(source[i].isDigit){
  22. auto start = i;
  23. while(i < source.length && source[i].isDigit){
  24. i++;
  25. }
  26. if(i < source.length && source[i] == '.'){ // include dot for float
  27. i++;
  28. while(i < source.length && source[i].isDigit){
  29. i++;
  30. }
  31. tokens ~= Token(TokenType.Float, source[start .. i]);
  32. }else{
  33. tokens ~= Token(TokenType.Integer, source[start .. i]);
  34. }
  35. }else if(source[i] == '"'){
  36. auto start = i++;
  37. while(i < source.length && source[i] != '"'){
  38. i++;
  39. }
  40. if(i < source.length){ // close quotes
  41. i++;
  42. }
  43. tokens ~= Token(TokenType.String, source[start .. i]);
  44. }else{ // common symbols as tokens
  45. tokens ~= Token(TokenType.Symbol, source[i].to!string);
  46. i++;
  47. }
  48. }
  49. return tokens;
  50. }