Filter robot with *bot* and *crawl* re
This commit is contained in:
parent
00ad08a201
commit
4c74a14037
|
@ -190,7 +190,7 @@ class IWLADisplayReferers(IPlugin):
|
||||||
|
|
||||||
# All key phrases in a file
|
# All key phrases in a file
|
||||||
if self.create_all_key_phrases:
|
if self.create_all_key_phrases:
|
||||||
title = createCurTitle(self.iwla, u'All Key Phrases')
|
title = createCurTitle(self.iwla, self.iwla._(u'All Key Phrases'))
|
||||||
|
|
||||||
filename = 'key_phrases.html'
|
filename = 'key_phrases.html'
|
||||||
path = self.iwla.getCurDisplayPath(filename)
|
path = self.iwla.getCurDisplayPath(filename)
|
||||||
|
|
|
@ -59,7 +59,8 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
self.awstats_robots = map(lambda (x) : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots)
|
self.awstats_robots = map(lambda (x) : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots)
|
||||||
|
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
|
||||||
|
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Basic rule to detect robots
|
# Basic rule to detect robots
|
||||||
|
@ -72,7 +73,11 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||||
referers = 0
|
referers = 0
|
||||||
|
|
||||||
first_page = super_hit['requests'][0]
|
first_page = super_hit['requests'][0]
|
||||||
if not self.iwla.isValidForCurrentAnalysis(first_page): continue
|
|
||||||
|
if self.robot_re.match(first_page['http_user_agent']) or\
|
||||||
|
self.crawl_re.match(first_page['http_user_agent']):
|
||||||
|
super_hit['robot'] = 1
|
||||||
|
continue
|
||||||
|
|
||||||
for r in self.awstats_robots:
|
for r in self.awstats_robots:
|
||||||
if r.match(first_page['http_user_agent']):
|
if r.match(first_page['http_user_agent']):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user