Initial commit
This commit is contained in:
39
plugins/hooks_pre/H001_robot.py
Normal file
39
plugins/hooks_pre/H001_robot.py
Normal file
@@ -0,0 +1,39 @@
|
||||
|
||||
# Basic rule to detect robots
|
||||
|
||||
def hook(hits):
|
||||
for k in hits.keys():
|
||||
super_hit = hits[k]
|
||||
|
||||
if super_hit['robot']: continue
|
||||
|
||||
isRobot = False
|
||||
referers = 0
|
||||
|
||||
# 1) no pages view --> robot
|
||||
if not super_hit['viewed_pages']:
|
||||
super_hit['robot'] = 1
|
||||
continue
|
||||
|
||||
# 2) pages without hit --> robot
|
||||
if not super_hit['viewed_hits']:
|
||||
super_hit['robot'] = 1
|
||||
continue
|
||||
|
||||
for hit in super_hit['pages']:
|
||||
# 3) /robots.txt read
|
||||
if hit['extract_request']['http_uri'] == '/robots.txt':
|
||||
isRobot = True
|
||||
break
|
||||
|
||||
# 4) Any referer for hits
|
||||
if not hit['is_page'] and hit['http_referer']:
|
||||
referers += 1
|
||||
|
||||
if isRobot:
|
||||
super_hit['robot'] = 1
|
||||
continue
|
||||
|
||||
if super_hit['viewed_hits'] and not referers:
|
||||
super_hit['robot'] = 1
|
||||
continue
|
19
plugins/hooks_pre/H002_soutade.py
Normal file
19
plugins/hooks_pre/H002_soutade.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import re
|
||||
|
||||
# Remove logo from indefero
|
||||
logo_re = re.compile(r'^.+/logo/$')
|
||||
|
||||
# Basic rule to detect robots
|
||||
|
||||
def hook(hits):
|
||||
for k in hits.keys():
|
||||
super_hit = hits[k]
|
||||
|
||||
if super_hit['robot']: continue
|
||||
|
||||
for p in super_hit['pages']:
|
||||
if not p['is_page']: continue
|
||||
if logo_re.match(p['extract_request']['extract_uri']):
|
||||
p['is_page'] = False
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
Reference in New Issue
Block a user