diff --git a/plugins/display/feeds.py b/plugins/display/feeds.py index ca5de4e..c211bfe 100644 --- a/plugins/display/feeds.py +++ b/plugins/display/feeds.py @@ -76,7 +76,9 @@ class IWLADisplayFeeds(IPlugin): table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')]) table.setColsCSSClass(['', 'iwla_page', 'iwla_hit']) for super_hit in hits.values(): - if not super_hit['feed_parser']: continue + if not super_hit.get('feed_parser', False): continue + if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.BAD_FEED_PARSER: + continue nb_feeds_parsers += 1 address = super_hit['remote_addr'] if display_visitor_ip and\ diff --git a/plugins/post_analysis/feeds.py b/plugins/post_analysis/feeds.py index e90e9e2..d054aaf 100644 --- a/plugins/post_analysis/feeds.py +++ b/plugins/post_analysis/feeds.py @@ -55,6 +55,7 @@ class IWLAPostAnalysisFeeds(IPlugin): NOT_A_FEED_PARSER = 0 FEED_PARSER = 1 MERGED_FEED_PARSER = 2 + BAD_FEED_PARSER = 3 def __init__(self, iwla): super(IWLAPostAnalysisFeeds, self).__init__(iwla) @@ -71,6 +72,9 @@ class IWLAPostAnalysisFeeds(IPlugin): for f in feeds: self.feeds_re.append(re.compile(r'.*%s.*' % (f))) + self.bad_feeds_re = [] + self.bad_feeds_re.append(re.compile(r'.*crawl.*')) + return True def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): @@ -94,7 +98,17 @@ class IWLAPostAnalysisFeeds(IPlugin): self.merge_one_hit_only_feeds_parsers: self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) - if not isFeedParser is None: continue + if isFeedParser: + if hit['feed_parser'] == self.BAD_FEED_PARSER: continue + if not hit.get('feed_name_analysed', False) and\ + hit.get('dns_name_replaced', False): + hit['feed_name_analysed'] = True + addr = hit.get('remote_addr', None) + for r in self.bad_feeds_re: + if r.match(addr): + hit['feed_parser'] = self.BAD_FEED_PARSER + return + return isFeedParser = self.NOT_A_FEED_PARSER uri = hit['requests'][0]['extract_request']['extract_uri'].lower() diff --git a/plugins/post_analysis/reverse_dns.py b/plugins/post_analysis/reverse_dns.py index 8b04784..37ae232 100644 --- a/plugins/post_analysis/reverse_dns.py +++ b/plugins/post_analysis/reverse_dns.py @@ -67,7 +67,7 @@ class IWLAPostAnalysisReverseDNS(IPlugin): hits = self.iwla.getCurrentVisits() for (k, hit) in hits.items(): if hit.get('dns_analysed', False): continue - if not hit['feed_parser'] and\ + if not hit.get('feed_parser', False) and\ not self.iwla.isValidVisitor(hit): continue try: