Add one more rule to robot detection : more than ten 404 pages viewed
This commit is contained in:
		| @@ -114,12 +114,16 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|  | ||||
|             not_found_pages = 0 | ||||
|             for hit in super_hit['requests']: | ||||
| # 3) /robots.txt read | ||||
|                 if hit['extract_request']['http_uri'].endswith('/robots.txt'): | ||||
|                     self._setRobot(k, super_hit) | ||||
|                     break | ||||
|  | ||||
|                 if int(hit['status']) == 404: | ||||
|                     not_found_pages += 1 | ||||
|  | ||||
| # 4) Any referer for hits | ||||
|                 if not hit['is_page'] and hit['http_referer']: | ||||
|                     referers += 1 | ||||
| @@ -128,6 +132,11 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|  | ||||
| # 5) more than 10 404 pages | ||||
|             if not_found_pages > 10: | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|  | ||||
|             if not super_hit['viewed_pages'] and \ | ||||
|                     (super_hit['viewed_hits'] and not referers): | ||||
|                 self._setRobot(k, super_hit) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user