# Basic rule to detect robots def hook(hits): for k in hits.keys(): super_hit = hits[k] if super_hit['robot']: continue isRobot = False referers = 0 # 1) no pages view --> robot # if not super_hit['viewed_pages']: # super_hit['robot'] = 1 # continue # 2) pages without hit --> robot if not super_hit['viewed_hits']: super_hit['robot'] = 1 continue elif not super_hit['viewed_pages']: # Hit only super_hit['hit_only'] = 1 for hit in super_hit['pages']: # 3) /robots.txt read if hit['extract_request']['http_uri'] == '/robots.txt': isRobot = True break # 4) Any referer for hits if not hit['is_page'] and hit['http_referer']: referers += 1 if isRobot: super_hit['robot'] = 1 continue if super_hit['viewed_hits'] and not referers: super_hit['robot'] = 1 continue