iwla/hooks/pre_analysis/H001_robot.py
2014-11-19 19:34:16 +01:00

40 lines
935 B
Python

# Basic rule to detect robots
def hook(hits):
for k in hits.keys():
super_hit = hits[k]
if super_hit['robot']: continue
isRobot = False
referers = 0
# 1) no pages view --> robot
if not super_hit['viewed_pages']:
super_hit['robot'] = 1
continue
# 2) pages without hit --> robot
if not super_hit['viewed_hits']:
super_hit['robot'] = 1
continue
for hit in super_hit['pages']:
# 3) /robots.txt read
if hit['extract_request']['http_uri'] == '/robots.txt':
isRobot = True
break
# 4) Any referer for hits
if not hit['is_page'] and hit['http_referer']:
referers += 1
if isRobot:
super_hit['robot'] = 1
continue
if super_hit['viewed_hits'] and not referers:
super_hit['robot'] = 1
continue