diff --git a/plugins/display/robot_bandwidth.py b/plugins/display/robot_bandwidth.py index d350246..65ea358 100644 --- a/plugins/display/robot_bandwidth.py +++ b/plugins/display/robot_bandwidth.py @@ -65,11 +65,22 @@ class IWLADisplayRobotBandwidth(IPlugin): hits = self.iwla.getCurrentVisits() bandwidths = [] + bandwidths_group = {} for (k, super_hit) in hits.items(): if not self.iwla.isRobot(super_hit): continue bandwidths.append((super_hit, super_hit['bandwidth'][0])) - bandwidths.sort(key=lambda tup: tup[1], reverse=True) + address = super_hit.get('robot_name', '') or super_hit['remote_addr'] + if address in bandwidths_group.keys(): + group = bandwidths_group[address] + if group['last_access'] < super_hit['last_access']: + group['last_access'] = super_hit['last_access'] + group['bandwidth'] += super_hit['bandwidth'][0] + else: + bandwidths_group[address] = { + 'last_access':super_hit['last_access'], + 'bandwidth':super_hit['bandwidth'][0] + } # All in a page if self.create_all_pages: @@ -78,8 +89,8 @@ class IWLADisplayRobotBandwidth(IPlugin): path = self.iwla.getCurDisplayPath(filename) page = display.createPage(title, path, self.iwla.getConfValue('css_path', [])) - table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1]) - table.setColsCSSClass(['', 'iwla_bandwidth', '']) + table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Name'), self.iwla._(u'Last seen')], [1]) + table.setColsCSSClass(['', 'iwla_bandwidth', '', '']) for (super_hit, bandwidth) in bandwidths: address = super_hit['remote_addr'] if self.display_visitor_ip and\ @@ -89,6 +100,7 @@ class IWLADisplayRobotBandwidth(IPlugin): row = [ address, bandwidth, + super_hit.get('robot_name', ''), time.asctime(super_hit['last_access']) ] table.appendRow(row) @@ -103,19 +115,16 @@ class IWLADisplayRobotBandwidth(IPlugin): # Top in index index = self.iwla.getDisplayIndex() - table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Host'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1]) + table = display.createBlock(DisplayHTMLBlockTable, title, [self.iwla._(u'Robot'), self.iwla._(u'Bandwidth'), self.iwla._(u'Last seen')], [1]) table.setColsCSSClass(['', 'iwla_bandwidth', '']) - for (super_hit, bandwidth) in bandwidths[:10]: - address = super_hit['remote_addr'] - if self.display_visitor_ip and\ - super_hit.get('dns_name_replaced', False): - address = '%s [%s]' % (address, super_hit['remote_ip']) - + _bandwidths_group = dict(sorted(bandwidths_group.items(), key=lambda g: g[1]['bandwidth'], reverse=True)) + for i, (k, group) in enumerate(_bandwidths_group.items()): + if i >= 10: break row = [ - address, - bandwidth, - time.asctime(super_hit['last_access']) + k, + group['bandwidth'], + time.asctime(group['last_access']) ] table.appendRow(row) index.appendBlock(table) diff --git a/plugins/pre_analysis/robots.py b/plugins/pre_analysis/robots.py index d4a0d6c..e2d71dd 100644 --- a/plugins/pre_analysis/robots.py +++ b/plugins/pre_analysis/robots.py @@ -63,6 +63,7 @@ class IWLAPreAnalysisRobots(IPlugin): self.awstats_robots = list(map(lambda x : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots)) self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE) self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE) + self.compatible_re = re.compile(r'.*\(.*compatible; (.*); \+.*\)*') self.logger = logging.getLogger(self.__class__.__name__) return True @@ -75,6 +76,9 @@ class IWLAPreAnalysisRobots(IPlugin): self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno)) super_hit['robot'] = 1 super_hit['keep_requests'] = False + robot_name = self.compatible_re.match(super_hit['requests'][0]['http_user_agent']) + if robot_name: + super_hit['robot_name'] = robot_name[1] # Basic rule to detect robots def hook(self):