Merge one hit only parsers in feeds parsers detection
This commit is contained in:
		| @@ -67,7 +67,7 @@ class IWLADisplayFeeds(IPlugin): | ||||
|  | ||||
|         # All in a page | ||||
|         if self.create_all_feeds_page: | ||||
|             title = createCurTitle(self.iwla, u'All Feeds parsers') | ||||
|             title = createCurTitle(self.iwla, self.iwla._(u'All Feeds parsers')) | ||||
|             filename = 'all_feeds.html' | ||||
|             path = self.iwla.getCurDisplayPath(filename) | ||||
|             display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False) | ||||
| @@ -81,7 +81,10 @@ class IWLADisplayFeeds(IPlugin): | ||||
|                 if display_visitor_ip and\ | ||||
|                         super_hit.get('dns_name_replaced', False): | ||||
|                     address = '%s [%s]' % (address, super_hit['remote_ip']) | ||||
|                 table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']]) | ||||
|                 if super_hit['robot']: | ||||
|                     table.appendRow([address, super_hit['not_viewed_pages'], super_hit['not_viewed_hits']]) | ||||
|                 else: | ||||
|                     table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']]) | ||||
|             page.appendBlock(table) | ||||
|  | ||||
|             display.addPage(page) | ||||
|   | ||||
| @@ -27,6 +27,8 @@ from iplugin import IPlugin | ||||
| Post analysis hook | ||||
|  | ||||
| Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) | ||||
| If there is ony one hit per day to a feed, merge feeds parsers with the same user agent | ||||
| as it must be the same person with a different IP address. | ||||
|  | ||||
| Plugin requirements : | ||||
|     None | ||||
| @@ -64,18 +66,34 @@ class IWLAPostAnalysisFeeds(IPlugin): | ||||
|  | ||||
|         return True | ||||
|          | ||||
|     def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): | ||||
|         if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1: | ||||
|             user_agent = hit['requests'][0]['http_user_agent'].lower() | ||||
|             if one_hit_only.get(user_agent, None) is None: | ||||
|                 one_hit_only[user_agent] = (hit) | ||||
|             else: | ||||
|                 isFeedParser = False | ||||
|         hit['feed_parser'] = isFeedParser | ||||
|  | ||||
|     def hook(self): | ||||
|         hits = self.iwla.getCurrentVisists() | ||||
|         one_hit_only = {} | ||||
|         for hit in hits.values(): | ||||
|             if not hit.get('feed_parser', None) is None: continue | ||||
|             isFeedParser = hit.get('feed_parser', None) | ||||
|  | ||||
|             if isFeedParser == True: | ||||
|                 self.mergeOneHitOnlyFeedsParsers(one_hit_only, hit) | ||||
|  | ||||
|             if not isFeedParser is None: continue | ||||
|  | ||||
|             isFeedParser = False | ||||
|             uri = hit['requests'][0]['extract_request']['extract_uri'].lower() | ||||
|             for regexp in self.feeds_re: | ||||
|                 if regexp.match(uri): | ||||
|                     isFeedParser = True | ||||
|                     # Robot that views pages -> bot | ||||
|                     if hit['robot']: | ||||
|                         if hit['viewed_pages']: continue | ||||
|                     isFeedParser = True | ||||
|                         if hit['viewed_pages']: | ||||
|                             isFeedParser = False | ||||
|                     break | ||||
|             hit['feed_parser'] = isFeedParser | ||||
|  | ||||
|             self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user