Find robot name in 'compatible' string and group them

This commit is contained in:
Gregory Soutade 2023-01-28 09:38:59 +01:00
parent 9c57ad3ece
commit ac246eabe2
2 changed files with 26 additions and 13 deletions

View File

@ -65,11 +65,22 @@ class IWLADisplayRobotBandwidth(IPlugin):
hits = self.iwla.getCurrentVisits()
bandwidths = []
bandwidths_group = {}
for (k, super_hit) in hits.items():
if not self.iwla.isRobot(super_hit):
continue
bandwidths.append((super_hit, super_hit['bandwidth'][0]))
bandwidths.sort(key=lambda tup: tup[1], reverse=True)
address = super_hit.get('robot_name', '') or super_hit['remote_addr']
if address in bandwidths_group.keys():
group = bandwidths_group[address]
if group['last_access'] < super_hit['last_access']:
group['last_access'] = super_hit['last_access']
group['bandwidth'] += super_hit['bandwidth'][0]
else:
bandwidths_group[address] = {
'last_access':super_hit['last_access'],
'bandwidth':super_hit['bandwidth'][0]
}
# All in a page
if self.create_all_pages:
@ -78,8 +89,8 @@ class IWLADisplayRobotBandwidth(IPlugin):
path = self.iwla.getCurDisplayPath(filename)
page = display.createPage(title, path, self.iwla.getConfValue('css_path', []))
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
table.setColsCSSClass(['', 'iwla_bandwidth', ''])
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Name'), self.iwla._(u'Last seen')], [1])
table.setColsCSSClass(['', 'iwla_bandwidth', '', ''])
for (super_hit, bandwidth) in bandwidths:
address = super_hit['remote_addr']
if self.display_visitor_ip and\
@ -89,6 +100,7 @@ class IWLADisplayRobotBandwidth(IPlugin):
row = [
address,
bandwidth,
super_hit.get('robot_name', ''),
time.asctime(super_hit['last_access'])
]
table.appendRow(row)
@ -103,19 +115,16 @@ class IWLADisplayRobotBandwidth(IPlugin):
# Top in index
index = self.iwla.getDisplayIndex()
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Robot'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1])
table.setColsCSSClass(['', 'iwla_bandwidth', ''])
for (super_hit, bandwidth) in bandwidths[:10]:
address = super_hit['remote_addr']
if self.display_visitor_ip and\
super_hit.get('dns_name_replaced', False):
address = '%s [%s]' % (address, super_hit['remote_ip'])
_bandwidths_group = dict(sorted(bandwidths_group.items(), key=lambda g: g[1]['bandwidth'], reverse=True))
for i, (k, group) in enumerate(_bandwidths_group.items()):
if i >= 10: break
row = [
address,
bandwidth,
time.asctime(super_hit['last_access'])
k,
group['bandwidth'],
time.asctime(group['last_access'])
]
table.appendRow(row)
index.appendBlock(table)

View File

@ -63,6 +63,7 @@ class IWLAPreAnalysisRobots(IPlugin):
self.awstats_robots = list(map(lambda x : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots))
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
self.compatible_re = re.compile(r'.*\(.*compatible; (.*); \+.*\)*')
self.logger = logging.getLogger(self.__class__.__name__)
return True
@ -75,6 +76,9 @@ class IWLAPreAnalysisRobots(IPlugin):
self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno))
super_hit['robot'] = 1
super_hit['keep_requests'] = False
robot_name = self.compatible_re.match(super_hit['requests'][0]['http_user_agent'])
if robot_name:
super_hit['robot_name'] = robot_name[1]
# Basic rule to detect robots
def hook(self):