Add one more rule to robot detection : more than ten 404 pages viewed
This commit is contained in:
parent
4bc2c1ad4b
commit
68a67adecc
|
@ -114,12 +114,16 @@ class IWLAPreAnalysisRobots(IPlugin):
|
|||
self._setRobot(k, super_hit)
|
||||
continue
|
||||
|
||||
not_found_pages = 0
|
||||
for hit in super_hit['requests']:
|
||||
# 3) /robots.txt read
|
||||
if hit['extract_request']['http_uri'].endswith('/robots.txt'):
|
||||
self._setRobot(k, super_hit)
|
||||
break
|
||||
|
||||
if int(hit['status']) == 404:
|
||||
not_found_pages += 1
|
||||
|
||||
# 4) Any referer for hits
|
||||
if not hit['is_page'] and hit['http_referer']:
|
||||
referers += 1
|
||||
|
@ -128,6 +132,11 @@ class IWLAPreAnalysisRobots(IPlugin):
|
|||
self._setRobot(k, super_hit)
|
||||
continue
|
||||
|
||||
# 5) more than 10 404 pages
|
||||
if not_found_pages > 10:
|
||||
self._setRobot(k, super_hit)
|
||||
continue
|
||||
|
||||
if not super_hit['viewed_pages'] and \
|
||||
(super_hit['viewed_hits'] and not referers):
|
||||
self._setRobot(k, super_hit)
|
||||
|
|
Loading…
Reference in New Issue
Block a user