crawlers.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. CRAWLERS_NAMES = {
  2. 'bing': 'Bingbot',
  3. 'google': 'Googlebot',
  4. 'yahoo': 'Yahoo! Slurp',
  5. 'yahooch': 'Yahoo! Slurp China',
  6. 'unidentified': 'Unidentified',
  7. }
  8. CRAWLERS_AGENTS = {
  9. 'bingbot/': 'bing',
  10. 'Googlebot/': 'google',
  11. 'Yahoo! Slurp China': 'yahooch',
  12. 'Yahoo! Slurp': 'yahoo',
  13. 'bot': 'unidentified',
  14. 'Bot': 'unidentified',
  15. 'BOT': 'unidentified',
  16. }
  17. CRAWLERS_HOSTS = {
  18. }
  19. class Crawler(object):
  20. crawler = False
  21. host = None
  22. username = None
  23. def __init__(self, agent = None, ip = None):
  24. if agent is not None:
  25. for item in CRAWLERS_AGENTS.keys():
  26. if agent.find(item) != -1:
  27. self.crawler = True
  28. self.username = CRAWLERS_AGENTS[item]
  29. break
  30. if ip is not None:
  31. for item in CRAWLERS_HOSTS.keys():
  32. if ip == item:
  33. self.crawler = True
  34. self.username = CRAWLERS_HOSTS[item]
  35. break
  36. if self.crawler:
  37. self.username = CRAWLERS_NAMES[self.username]
  38. self.host = ip