Filter robot with *bot* and *crawl* re
This commit is contained in:
parent
00ad08a201
commit
4c74a14037
|
@ -190,7 +190,7 @@ class IWLADisplayReferers(IPlugin):
|
|||
|
||||
# All key phrases in a file
|
||||
if self.create_all_key_phrases:
|
||||
title = createCurTitle(self.iwla, u'All Key Phrases')
|
||||
title = createCurTitle(self.iwla, self.iwla._(u'All Key Phrases'))
|
||||
|
||||
filename = 'key_phrases.html'
|
||||
path = self.iwla.getCurDisplayPath(filename)
|
||||
|
|
|
@ -59,7 +59,8 @@ class IWLAPreAnalysisRobots(IPlugin):
|
|||
|
||||
def load(self):
|
||||
self.awstats_robots = map(lambda (x) : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots)
|
||||
|
||||
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
|
||||
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
|
||||
return True
|
||||
|
||||
# Basic rule to detect robots
|
||||
|
@ -72,7 +73,11 @@ class IWLAPreAnalysisRobots(IPlugin):
|
|||
referers = 0
|
||||
|
||||
first_page = super_hit['requests'][0]
|
||||
if not self.iwla.isValidForCurrentAnalysis(first_page): continue
|
||||
|
||||
if self.robot_re.match(first_page['http_user_agent']) or\
|
||||
self.crawl_re.match(first_page['http_user_agent']):
|
||||
super_hit['robot'] = 1
|
||||
continue
|
||||
|
||||
for r in self.awstats_robots:
|
||||
if r.match(first_page['http_user_agent']):
|
||||
|
|
Loading…
Reference in New Issue
Block a user