Remove crawler from feed parsers
This commit is contained in:
		| @@ -76,7 +76,9 @@ class IWLADisplayFeeds(IPlugin): | ||||
|             table = display.createBlock(DisplayHTMLBlockTable, self.iwla._(u'All feeds parsers'), [self.iwla._(u'Host'), self.iwla._(u'Pages'), self.iwla._(u'Hits')]) | ||||
|             table.setColsCSSClass(['', 'iwla_page', 'iwla_hit']) | ||||
|             for super_hit in hits.values(): | ||||
|                 if not super_hit['feed_parser']: continue | ||||
|                 if not super_hit.get('feed_parser', False): continue | ||||
|                 if super_hit['feed_parser'] == IWLAPostAnalysisFeeds.BAD_FEED_PARSER: | ||||
|                     continue | ||||
|                 nb_feeds_parsers += 1 | ||||
|                 address = super_hit['remote_addr'] | ||||
|                 if display_visitor_ip and\ | ||||
|   | ||||
| @@ -55,6 +55,7 @@ class IWLAPostAnalysisFeeds(IPlugin): | ||||
|     NOT_A_FEED_PARSER = 0 | ||||
|     FEED_PARSER = 1 | ||||
|     MERGED_FEED_PARSER = 2 | ||||
|     BAD_FEED_PARSER = 3 | ||||
|  | ||||
|     def __init__(self, iwla): | ||||
|         super(IWLAPostAnalysisFeeds, self).__init__(iwla) | ||||
| @@ -71,6 +72,9 @@ class IWLAPostAnalysisFeeds(IPlugin): | ||||
|         for f in feeds: | ||||
|             self.feeds_re.append(re.compile(r'.*%s.*' % (f))) | ||||
|  | ||||
|         self.bad_feeds_re = [] | ||||
|         self.bad_feeds_re.append(re.compile(r'.*crawl.*')) | ||||
|  | ||||
|         return True | ||||
|          | ||||
|     def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): | ||||
| @@ -94,7 +98,17 @@ class IWLAPostAnalysisFeeds(IPlugin): | ||||
|                     self.merge_one_hit_only_feeds_parsers: | ||||
|                 self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) | ||||
|  | ||||
|             if not isFeedParser is None: continue | ||||
|             if isFeedParser: | ||||
|                 if hit['feed_parser'] == self.BAD_FEED_PARSER: continue | ||||
|                 if not hit.get('feed_name_analysed', False) and\ | ||||
|                    hit.get('dns_name_replaced', False): | ||||
|                     hit['feed_name_analysed'] = True | ||||
|                     addr = hit.get('remote_addr', None) | ||||
|                     for r in self.bad_feeds_re: | ||||
|                         if r.match(addr): | ||||
|                             hit['feed_parser'] = self.BAD_FEED_PARSER | ||||
|                             return | ||||
|                 return | ||||
|  | ||||
|             isFeedParser = self.NOT_A_FEED_PARSER | ||||
|             uri = hit['requests'][0]['extract_request']['extract_uri'].lower() | ||||
|   | ||||
| @@ -67,7 +67,7 @@ class IWLAPostAnalysisReverseDNS(IPlugin): | ||||
|         hits = self.iwla.getCurrentVisits() | ||||
|         for (k, hit) in hits.items(): | ||||
|             if hit.get('dns_analysed', False): continue | ||||
|             if not hit['feed_parser'] and\ | ||||
|             if not hit.get('feed_parser', False) and\ | ||||
|                not self.iwla.isValidVisitor(hit): | ||||
|                 continue | ||||
|             try: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user