Find robot name in 'compatible' string and group them
This commit is contained in:
parent
9c57ad3ece
commit
ac246eabe2
|
@ -65,11 +65,22 @@ class IWLADisplayRobotBandwidth(IPlugin):
|
||||||
hits = self.iwla.getCurrentVisits()
|
hits = self.iwla.getCurrentVisits()
|
||||||
|
|
||||||
bandwidths = []
|
bandwidths = []
|
||||||
|
bandwidths_group = {}
|
||||||
for (k, super_hit) in hits.items():
|
for (k, super_hit) in hits.items():
|
||||||
if not self.iwla.isRobot(super_hit):
|
if not self.iwla.isRobot(super_hit):
|
||||||
continue
|
continue
|
||||||
bandwidths.append((super_hit, super_hit['bandwidth'][0]))
|
bandwidths.append((super_hit, super_hit['bandwidth'][0]))
|
||||||
bandwidths.sort(key=lambda tup: tup[1], reverse=True)
|
address = super_hit.get('robot_name', '') or super_hit['remote_addr']
|
||||||
|
if address in bandwidths_group.keys():
|
||||||
|
group = bandwidths_group[address]
|
||||||
|
if group['last_access'] < super_hit['last_access']:
|
||||||
|
group['last_access'] = super_hit['last_access']
|
||||||
|
group['bandwidth'] += super_hit['bandwidth'][0]
|
||||||
|
else:
|
||||||
|
bandwidths_group[address] = {
|
||||||
|
'last_access':super_hit['last_access'],
|
||||||
|
'bandwidth':super_hit['bandwidth'][0]
|
||||||
|
}
|
||||||
|
|
||||||
# All in a page
|
# All in a page
|
||||||
if self.create_all_pages:
|
if self.create_all_pages:
|
||||||
|
@ -78,8 +89,8 @@ class IWLADisplayRobotBandwidth(IPlugin):
|
||||||
path = self.iwla.getCurDisplayPath(filename)
|
path = self.iwla.getCurDisplayPath(filename)
|
||||||
|
|
||||||
page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
|
page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
|
||||||
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
|
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Name'), self.iwla._(u'Last seen')], [1])
|
||||||
table.setColsCSSClass(['', 'iwla_bandwidth', ''])
|
table.setColsCSSClass(['', 'iwla_bandwidth', '', ''])
|
||||||
for (super_hit, bandwidth) in bandwidths:
|
for (super_hit, bandwidth) in bandwidths:
|
||||||
address = super_hit['remote_addr']
|
address = super_hit['remote_addr']
|
||||||
if self.display_visitor_ip and\
|
if self.display_visitor_ip and\
|
||||||
|
@ -89,6 +100,7 @@ class IWLADisplayRobotBandwidth(IPlugin):
|
||||||
row = [
|
row = [
|
||||||
address,
|
address,
|
||||||
bandwidth,
|
bandwidth,
|
||||||
|
super_hit.get('robot_name', ''),
|
||||||
time.asctime(super_hit['last_access'])
|
time.asctime(super_hit['last_access'])
|
||||||
]
|
]
|
||||||
table.appendRow(row)
|
table.appendRow(row)
|
||||||
|
@ -103,19 +115,16 @@ class IWLADisplayRobotBandwidth(IPlugin):
|
||||||
|
|
||||||
# Top in index
|
# Top in index
|
||||||
index = self.iwla.getDisplayIndex()
|
index = self.iwla.getDisplayIndex()
|
||||||
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
|
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Robot'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
|
||||||
table.setColsCSSClass(['', 'iwla_bandwidth', ''])
|
table.setColsCSSClass(['', 'iwla_bandwidth', ''])
|
||||||
|
|
||||||
for (super_hit, bandwidth) in bandwidths[:10]:
|
_bandwidths_group = dict(sorted(bandwidths_group.items(), key=lambda g: g[1]['bandwidth'], reverse=True))
|
||||||
address = super_hit['remote_addr']
|
for i, (k, group) in enumerate(_bandwidths_group.items()):
|
||||||
if self.display_visitor_ip and\
|
if i >= 10: break
|
||||||
super_hit.get('dns_name_replaced', False):
|
|
||||||
address = '%s [%s]' % (address, super_hit['remote_ip'])
|
|
||||||
|
|
||||||
row = [
|
row = [
|
||||||
address,
|
k,
|
||||||
bandwidth,
|
group['bandwidth'],
|
||||||
time.asctime(super_hit['last_access'])
|
time.asctime(group['last_access'])
|
||||||
]
|
]
|
||||||
table.appendRow(row)
|
table.appendRow(row)
|
||||||
index.appendBlock(table)
|
index.appendBlock(table)
|
||||||
|
|
|
@ -63,6 +63,7 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||||
self.awstats_robots = list(map(lambda x : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots))
|
self.awstats_robots = list(map(lambda x : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots))
|
||||||
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
|
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
|
||||||
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
|
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
|
||||||
|
self.compatible_re = re.compile(r'.*\(.*compatible; (.*); \+.*\)*')
|
||||||
self.logger = logging.getLogger(self.__class__.__name__)
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -75,6 +76,9 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||||
self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno))
|
self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno))
|
||||||
super_hit['robot'] = 1
|
super_hit['robot'] = 1
|
||||||
super_hit['keep_requests'] = False
|
super_hit['keep_requests'] = False
|
||||||
|
robot_name = self.compatible_re.match(super_hit['requests'][0]['http_user_agent'])
|
||||||
|
if robot_name:
|
||||||
|
super_hit['robot_name'] = robot_name[1]
|
||||||
|
|
||||||
# Basic rule to detect robots
|
# Basic rule to detect robots
|
||||||
def hook(self):
|
def hook(self):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user