iwla/hooks/pre_analysis/H002_robot.py

43 lines
1.0 KiB
Python
Raw Normal View History

2014-11-19 19:34:16 +01:00
# Basic rule to detect robots
def hook(hits):
for k in hits.keys():
super_hit = hits[k]
if super_hit['robot']: continue
isRobot = False
referers = 0
# 1) no pages view --> robot
2014-11-20 09:37:54 +01:00
# if not super_hit['viewed_pages']:
# super_hit['robot'] = 1
# continue
2014-11-19 19:34:16 +01:00
# 2) pages without hit --> robot
if not super_hit['viewed_hits']:
super_hit['robot'] = 1
continue
2014-11-20 14:09:01 +01:00
elif not super_hit['viewed_pages']:
# Hit only
super_hit['hit_only'] = 1
2014-11-19 19:34:16 +01:00
for hit in super_hit['pages']:
# 3) /robots.txt read
if hit['extract_request']['http_uri'] == '/robots.txt':
isRobot = True
break
# 4) Any referer for hits
if not hit['is_page'] and hit['http_referer']:
referers += 1
if isRobot:
super_hit['robot'] = 1
continue
if super_hit['viewed_hits'] and not referers:
super_hit['robot'] = 1
continue