crawlers.py 1.0 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. CRAWLERS_NAMES = {
  2. 'bing': 'Bingbot',
  3. 'google': 'Googlebot',
  4. 'yahoo': 'Yahoo! Slurp',
  5. 'yahooch': 'Yahoo! Slurp China',
  6. }
  7. CRAWLERS_AGENTS = {
  8. 'bingbot/': 'bing',
  9. 'Googlebot/': 'google',
  10. 'Yahoo! Slurp China': 'yahooch',
  11. 'Yahoo! Slurp': 'yahoo',
  12. }
  13. CRAWLERS_HOSTS = {
  14. }
  15. class Crawler(object):
  16. crawler = False
  17. host = None
  18. username = None
  19. def __init__(self, agent = None, ip = None):
  20. if agent is not None:
  21. for item in CRAWLERS_AGENTS.keys():
  22. if agent.find(item) != -1:
  23. self.crawler = True
  24. self.username = CRAWLERS_AGENTS[item]
  25. if ip is not None:
  26. for item in CRAWLERS_HOSTS.keys():
  27. if ip == item:
  28. self.crawler = True
  29. self.username = CRAWLERS_HOSTS[item]
  30. if self.crawler:
  31. self.username = CRAWLERS_NAMES[self.username]
  32. self.host = ip