Find robot name in 'compatible' string and group them
This commit is contained in:
parent
9c57ad3ece
commit
ac246eabe2
|
@ -65,11 +65,22 @@ class IWLADisplayRobotBandwidth(IPlugin):
|
|||
hits = self.iwla.getCurrentVisits()
|
||||
|
||||
bandwidths = []
|
||||
bandwidths_group = {}
|
||||
for (k, super_hit) in hits.items():
|
||||
if not self.iwla.isRobot(super_hit):
|
||||
continue
|
||||
bandwidths.append((super_hit, super_hit['bandwidth'][0]))
|
||||
bandwidths.sort(key=lambda tup: tup[1], reverse=True)
|
||||
address = super_hit.get('robot_name', '') or super_hit['remote_addr']
|
||||
if address in bandwidths_group.keys():
|
||||
group = bandwidths_group[address]
|
||||
if group['last_access'] < super_hit['last_access']:
|
||||
group['last_access'] = super_hit['last_access']
|
||||
group['bandwidth'] += super_hit['bandwidth'][0]
|
||||
else:
|
||||
bandwidths_group[address] = {
|
||||
'last_access':super_hit['last_access'],
|
||||
'bandwidth':super_hit['bandwidth'][0]
|
||||
}
|
||||
|
||||
# All in a page
|
||||
if self.create_all_pages:
|
||||
|
@ -78,8 +89,8 @@ class IWLADisplayRobotBandwidth(IPlugin):
|
|||
path = self.iwla.getCurDisplayPath(filename)
|
||||
|
||||
page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
|
||||
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
|
||||
table.setColsCSSClass(['', 'iwla_bandwidth', ''])
|
||||
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Name'), self.iwla._(u'Last seen')], [1])
|
||||
table.setColsCSSClass(['', 'iwla_bandwidth', '', ''])
|
||||
for (super_hit, bandwidth) in bandwidths:
|
||||
address = super_hit['remote_addr']
|
||||
if self.display_visitor_ip and\
|
||||
|
@ -89,6 +100,7 @@ class IWLADisplayRobotBandwidth(IPlugin):
|
|||
row = [
|
||||
address,
|
||||
bandwidth,
|
||||
super_hit.get('robot_name', ''),
|
||||
time.asctime(super_hit['last_access'])
|
||||
]
|
||||
table.appendRow(row)
|
||||
|
@ -103,19 +115,16 @@ class IWLADisplayRobotBandwidth(IPlugin):
|
|||
|
||||
# Top in index
|
||||
index = self.iwla.getDisplayIndex()
|
||||
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
|
||||
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Robot'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
|
||||
table.setColsCSSClass(['', 'iwla_bandwidth', ''])
|
||||
|
||||
for (super_hit, bandwidth) in bandwidths[:10]:
|
||||
address = super_hit['remote_addr']
|
||||
if self.display_visitor_ip and\
|
||||
super_hit.get('dns_name_replaced', False):
|
||||
address = '%s [%s]' % (address, super_hit['remote_ip'])
|
||||
|
||||
_bandwidths_group = dict(sorted(bandwidths_group.items(), key=lambda g: g[1]['bandwidth'], reverse=True))
|
||||
for i, (k, group) in enumerate(_bandwidths_group.items()):
|
||||
if i >= 10: break
|
||||
row = [
|
||||
address,
|
||||
bandwidth,
|
||||
time.asctime(super_hit['last_access'])
|
||||
k,
|
||||
group['bandwidth'],
|
||||
time.asctime(group['last_access'])
|
||||
]
|
||||
table.appendRow(row)
|
||||
index.appendBlock(table)
|
||||
|
|
|
@ -63,6 +63,7 @@ class IWLAPreAnalysisRobots(IPlugin):
|
|||
self.awstats_robots = list(map(lambda x : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots))
|
||||
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
|
||||
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
|
||||
self.compatible_re = re.compile(r'.*\(.*compatible; (.*); \+.*\)*')
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
return True
|
||||
|
@ -75,6 +76,9 @@ class IWLAPreAnalysisRobots(IPlugin):
|
|||
self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno))
|
||||
super_hit['robot'] = 1
|
||||
super_hit['keep_requests'] = False
|
||||
robot_name = self.compatible_re.match(super_hit['requests'][0]['http_user_agent'])
|
||||
if robot_name:
|
||||
super_hit['robot_name'] = robot_name[1]
|
||||
|
||||
# Basic rule to detect robots
|
||||
def hook(self):
|
||||
|
|
Loading…
Reference in New Issue
Block a user