Find robot name in 'compatible' string and group them

This commit is contained in:
Gregory Soutade 2023-01-28 09:38:59 +01:00
parent 9c57ad3ece
commit ac246eabe2
2 changed files with 26 additions and 13 deletions

View File

@ -65,11 +65,22 @@ class IWLADisplayRobotBandwidth(IPlugin):
hits = self.iwla.getCurrentVisits() hits = self.iwla.getCurrentVisits()
bandwidths = [] bandwidths = []
bandwidths_group = {}
for (k, super_hit) in hits.items(): for (k, super_hit) in hits.items():
if not self.iwla.isRobot(super_hit): if not self.iwla.isRobot(super_hit):
continue continue
bandwidths.append((super_hit, super_hit['bandwidth'][0])) bandwidths.append((super_hit, super_hit['bandwidth'][0]))
bandwidths.sort(key=lambda tup: tup[1], reverse=True) address = super_hit.get('robot_name', '') or super_hit['remote_addr']
if address in bandwidths_group.keys():
group = bandwidths_group[address]
if group['last_access'] < super_hit['last_access']:
group['last_access'] = super_hit['last_access']
group['bandwidth'] += super_hit['bandwidth'][0]
else:
bandwidths_group[address] = {
'last_access':super_hit['last_access'],
'bandwidth':super_hit['bandwidth'][0]
}
# All in a page # All in a page
if self.create_all_pages: if self.create_all_pages:
@ -78,8 +89,8 @@ class IWLADisplayRobotBandwidth(IPlugin):
path = self.iwla.getCurDisplayPath(filename) path = self.iwla.getCurDisplayPath(filename)
page = display.createPage(title, path, self.iwla.getConfValue('css_path', [])) page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1]) table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Name'), self.iwla._(u'Last seen')], [1])
table.setColsCSSClass(['', 'iwla_bandwidth', '']) table.setColsCSSClass(['', 'iwla_bandwidth', '', ''])
for (super_hit, bandwidth) in bandwidths: for (super_hit, bandwidth) in bandwidths:
address = super_hit['remote_addr'] address = super_hit['remote_addr']
if self.display_visitor_ip and\ if self.display_visitor_ip and\
@ -89,6 +100,7 @@ class IWLADisplayRobotBandwidth(IPlugin):
row = [ row = [
address, address,
bandwidth, bandwidth,
super_hit.get('robot_name', ''),
time.asctime(super_hit['last_access']) time.asctime(super_hit['last_access'])
] ]
table.appendRow(row) table.appendRow(row)
@ -103,19 +115,16 @@ class IWLADisplayRobotBandwidth(IPlugin):
# Top in index # Top in index
index = self.iwla.getDisplayIndex() index = self.iwla.getDisplayIndex()
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1]) table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Robot'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
table.setColsCSSClass(['', 'iwla_bandwidth', '']) table.setColsCSSClass(['', 'iwla_bandwidth', ''])
for (super_hit, bandwidth) in bandwidths[:10]: _bandwidths_group = dict(sorted(bandwidths_group.items(), key=lambda g: g[1]['bandwidth'], reverse=True))
address = super_hit['remote_addr'] for i, (k, group) in enumerate(_bandwidths_group.items()):
if self.display_visitor_ip and\ if i >= 10: break
super_hit.get('dns_name_replaced', False):
address = '%s [%s]' % (address, super_hit['remote_ip'])
row = [ row = [
address, k,
bandwidth, group['bandwidth'],
time.asctime(super_hit['last_access']) time.asctime(group['last_access'])
] ]
table.appendRow(row) table.appendRow(row)
index.appendBlock(table) index.appendBlock(table)

View File

@ -63,6 +63,7 @@ class IWLAPreAnalysisRobots(IPlugin):
self.awstats_robots = list(map(lambda x : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots)) self.awstats_robots = list(map(lambda x : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots))
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE) self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE) self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
self.compatible_re = re.compile(r'.*\(.*compatible; (.*); \+.*\)*')
self.logger = logging.getLogger(self.__class__.__name__) self.logger = logging.getLogger(self.__class__.__name__)
return True return True
@ -75,6 +76,9 @@ class IWLAPreAnalysisRobots(IPlugin):
self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno)) self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno))
super_hit['robot'] = 1 super_hit['robot'] = 1
super_hit['keep_requests'] = False super_hit['keep_requests'] = False
robot_name = self.compatible_re.match(super_hit['requests'][0]['http_user_agent'])
if robot_name:
super_hit['robot_name'] = robot_name[1]
# Basic rule to detect robots # Basic rule to detect robots
def hook(self): def hook(self):