Filter robot with *bot* and *crawl* re
This commit is contained in:
		| @@ -59,7 +59,8 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|  | ||||
|     def load(self): | ||||
|         self.awstats_robots = map(lambda (x) : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots) | ||||
|      | ||||
|         self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE) | ||||
|         self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE) | ||||
|         return True | ||||
|  | ||||
| # Basic rule to detect robots | ||||
| @@ -72,7 +73,11 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|             referers = 0 | ||||
|  | ||||
|             first_page = super_hit['requests'][0] | ||||
|             if not self.iwla.isValidForCurrentAnalysis(first_page): continue | ||||
|  | ||||
|             if self.robot_re.match(first_page['http_user_agent']) or\ | ||||
|                     self.crawl_re.match(first_page['http_user_agent']): | ||||
|                 super_hit['robot'] = 1 | ||||
|                 continue | ||||
|  | ||||
|             for r in self.awstats_robots: | ||||
|                 if r.match(first_page['http_user_agent']): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user