englishcorpus.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. import os
  2. import random
  3. PHRASES_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "phrases.txt")
  4. class EnglishCorpus:
  5. def __init__(self, phrases_file=PHRASES_FILE, min_length=None, max_length=None):
  6. self._countdown = 0
  7. self._previous = None
  8. self.phrases = []
  9. with open(phrases_file, "r") as f:
  10. for phrase in [l.strip() for l in f.readlines()]:
  11. if min_length and len(phrase) < min_length:
  12. continue
  13. if max_length and len(phrase) > max_length:
  14. continue
  15. self.phrases.append(phrase)
  16. def _countdown_to_shuffle(self):
  17. self._countdown -= 1
  18. if self._countdown < 0:
  19. self._countdown = random.randint(500, 1000)
  20. self.shuffle()
  21. def __len__(self):
  22. return len(self.phrases)
  23. def shuffle(self):
  24. random.shuffle(self.phrases)
  25. def random_sentence(self):
  26. self._countdown_to_shuffle()
  27. choice = None
  28. while not choice or choice == self._previous:
  29. choice = random.choice(self.phrases)
  30. self._previous = choice
  31. return choice
  32. def random_sentences(self, no):
  33. self._countdown_to_shuffle()
  34. max_no = len(self) - no - 1
  35. start = random.randint(0, max_no)
  36. sentences = self.phrases[start : (start + no)]
  37. random.shuffle(sentences)
  38. return sentences