Change merge_one_hit_only_feeds_parsers by merge_feeds_parsers and set it to False by default
This commit is contained in:
		| @@ -27,7 +27,7 @@ from iplugin import IPlugin | |||||||
| Post analysis hook | Post analysis hook | ||||||
|  |  | ||||||
| Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) | Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) | ||||||
| If there is ony one hit per day to a feed, merge feeds parsers with the same user agent | If merge_feeds_parsers is set to True, merge feeds parsers with the same user agent | ||||||
| as it must be the same person with a different IP address. | as it must be the same person with a different IP address. | ||||||
|  |  | ||||||
| Plugin requirements : | Plugin requirements : | ||||||
| @@ -36,7 +36,7 @@ Plugin requirements : | |||||||
| Conf values needed : | Conf values needed : | ||||||
|     feeds |     feeds | ||||||
|     feeds_referers* |     feeds_referers* | ||||||
|     merge_one_hit_only_feeds_parsers* |     merge_feeds_parsers* | ||||||
|  |  | ||||||
| Output files : | Output files : | ||||||
|     None |     None | ||||||
| @@ -66,7 +66,7 @@ class IWLAPostAnalysisFeeds(IPlugin): | |||||||
|     def load(self): |     def load(self): | ||||||
|         feeds = self.iwla.getConfValue('feeds', []) |         feeds = self.iwla.getConfValue('feeds', []) | ||||||
|         feeds_referers = self.iwla.getConfValue('feeds_referers', []) |         feeds_referers = self.iwla.getConfValue('feeds_referers', []) | ||||||
|         self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True) |         self.merge_feeds_parsers = self.iwla.getConfValue('merge_feeds_parsers', False) | ||||||
|  |  | ||||||
|         if feeds is None: return False |         if feeds is None: return False | ||||||
|  |  | ||||||
| @@ -88,15 +88,21 @@ class IWLAPostAnalysisFeeds(IPlugin): | |||||||
|  |  | ||||||
|         return True |         return True | ||||||
|          |          | ||||||
|     def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): |     def mergeFeedsParsers(self, isFeedParser, one_hit_only, hit): | ||||||
|         if isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1: |         # One hit only match | ||||||
|  |         if isFeedParser: #isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1: | ||||||
|             user_agent = hit['requests'][0]['http_user_agent'].lower() |             user_agent = hit['requests'][0]['http_user_agent'].lower() | ||||||
|  |             # First time, register into dict | ||||||
|             if one_hit_only.get(user_agent, None) is None: |             if one_hit_only.get(user_agent, None) is None: | ||||||
|                 # Merged |                 # Merged | ||||||
|                 isFeedParser = self.MERGED_FEED_PARSER |                 one_hit_only[user_agent] = hit | ||||||
|                 one_hit_only[user_agent] = (hit) |  | ||||||
|             else: |             else: | ||||||
|                 isFeedParser = self.NOT_A_FEED_PARSER |                 # Next time | ||||||
|  |                 # Current must be ignored | ||||||
|  |                 hit['feed_parser'] = self.NOT_A_FEED_PARSER | ||||||
|  |                 # Previous matched hit must be set as merged | ||||||
|  |                 isFeedParser = self.MERGED_FEED_PARSER | ||||||
|  |                 hit = one_hit_only[user_agent] | ||||||
|         hit['feed_parser'] = isFeedParser |         hit['feed_parser'] = isFeedParser | ||||||
|  |  | ||||||
|     def hook(self): |     def hook(self): | ||||||
| @@ -105,9 +111,11 @@ class IWLAPostAnalysisFeeds(IPlugin): | |||||||
|         for hit in hits.values(): |         for hit in hits.values(): | ||||||
|             isFeedParser = hit.get('feed_parser', None) |             isFeedParser = hit.get('feed_parser', None) | ||||||
|  |  | ||||||
|             if isFeedParser == self.FEED_PARSER and\ |             # Register already tagged feed parser in one_hit_only | ||||||
|                     self.merge_one_hit_only_feeds_parsers: |             if self.merge_feeds_parsers and\ | ||||||
|                 self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) |                not isFeedParser in (None, self.BAD_FEED_PARSER): | ||||||
|  |                 self.mergeFeedsParsers(isFeedParser, one_hit_only, hit) | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|             if isFeedParser: |             if isFeedParser: | ||||||
|                 if hit['feed_parser'] == self.BAD_FEED_PARSER: continue |                 if hit['feed_parser'] == self.BAD_FEED_PARSER: continue | ||||||
| @@ -118,8 +126,8 @@ class IWLAPostAnalysisFeeds(IPlugin): | |||||||
|                     for r in self.bad_feeds_re: |                     for r in self.bad_feeds_re: | ||||||
|                         if r.match(addr): |                         if r.match(addr): | ||||||
|                             hit['feed_parser'] = self.BAD_FEED_PARSER |                             hit['feed_parser'] = self.BAD_FEED_PARSER | ||||||
|                             return |                             break | ||||||
|                 return |                 continue | ||||||
|  |  | ||||||
|             request = hit['requests'][0] |             request = hit['requests'][0] | ||||||
|             isFeedParser = self.NOT_A_FEED_PARSER |             isFeedParser = self.NOT_A_FEED_PARSER | ||||||
| @@ -148,7 +156,7 @@ class IWLAPostAnalysisFeeds(IPlugin): | |||||||
|                         isFeedParser = self.FEED_PARSER |                         isFeedParser = self.FEED_PARSER | ||||||
|                         break |                         break | ||||||
|  |  | ||||||
|             if self.merge_one_hit_only_feeds_parsers: |             if self.merge_feeds_parsers: | ||||||
|                 self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) |                 self.mergeFeedsParsers(isFeedParser, one_hit_only, hit) | ||||||
|             else: |             else: | ||||||
|                 hit['feed_parser'] = isFeedParser |                 hit['feed_parser'] = isFeedParser | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user