Browse Source

Detect unidentified crawlers. #43

Rafał Pitoń 12 years ago
parent
commit
ecf28336b3
1 changed files with 44 additions and 38 deletions
  1. 44 38
      misago/crawlers.py

+ 44 - 38
misago/crawlers.py

@@ -1,39 +1,45 @@
-CRAWLERS_NAMES = {
-    'bing': 'Bingbot',
-    'google': 'Googlebot',
-    'yahoo': 'Yahoo! Slurp',
-    'yahooch': 'Yahoo! Slurp China',
-}
-
-CRAWLERS_AGENTS = {
-    'bingbot/': 'bing',
-    'Googlebot/': 'google',
-    'Yahoo! Slurp China': 'yahooch',
-    'Yahoo! Slurp': 'yahoo',
-}
-
-CRAWLERS_HOSTS = {
-}
-
-
-class Crawler(object):
-    crawler = False
-    host = None
-    username = None
-    
-    def __init__(self, agent = None, ip = None):
-        if agent is not None:
-            for item in CRAWLERS_AGENTS.keys():
-            	if agent.find(item) != -1:
-                    self.crawler = True
-                    self.username = CRAWLERS_AGENTS[item]
-                    
-        if ip is not None:
-            for item in CRAWLERS_HOSTS.keys():
-            	if ip == item:
-                    self.crawler = True
-                    self.username = CRAWLERS_HOSTS[item]
-                    
-        if self.crawler:
-            self.username = CRAWLERS_NAMES[self.username]
+CRAWLERS_NAMES = {
+    'bing': 'Bingbot',
+    'google': 'Googlebot',
+    'yahoo': 'Yahoo! Slurp',
+    'yahooch': 'Yahoo! Slurp China',
+    'unidentified': 'Unidentified',
+}
+
+CRAWLERS_AGENTS = {
+    'bingbot/': 'bing',
+    'Googlebot/': 'google',
+    'Yahoo! Slurp China': 'yahooch',
+    'Yahoo! Slurp': 'yahoo',
+    'bot': 'unidentified',
+    'Bot': 'unidentified',
+    'BOT': 'unidentified',
+}
+
+CRAWLERS_HOSTS = {
+}
+
+
+class Crawler(object):
+    crawler = False
+    host = None
+    username = None
+
+    def __init__(self, agent = None, ip = None):
+        if agent is not None:
+            for item in CRAWLERS_AGENTS.keys():
+            	if agent.find(item) != -1:
+                    self.crawler = True
+                    self.username = CRAWLERS_AGENTS[item]
+                    break
+
+        if ip is not None:
+            for item in CRAWLERS_HOSTS.keys():
+            	if ip == item:
+                    self.crawler = True
+                    self.username = CRAWLERS_HOSTS[item]
+                    break
+
+        if self.crawler:
+            self.username = CRAWLERS_NAMES[self.username]
             self.host = ip
             self.host = ip