diff --git a/plugins/post_analysis/feeds.py b/plugins/post_analysis/feeds.py index 284dd22..c42fcdb 100644 --- a/plugins/post_analysis/feeds.py +++ b/plugins/post_analysis/feeds.py @@ -27,7 +27,7 @@ from iplugin import IPlugin Post analysis hook Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) -If there is ony one hit per day to a feed, merge feeds parsers with the same user agent +If merge_feeds_parsers is set to True, merge feeds parsers with the same user agent as it must be the same person with a different IP address. Plugin requirements : @@ -36,7 +36,7 @@ Plugin requirements : Conf values needed : feeds feeds_referers* - merge_one_hit_only_feeds_parsers* + merge_feeds_parsers* Output files : None @@ -66,7 +66,7 @@ class IWLAPostAnalysisFeeds(IPlugin): def load(self): feeds = self.iwla.getConfValue('feeds', []) feeds_referers = self.iwla.getConfValue('feeds_referers', []) - self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True) + self.merge_feeds_parsers = self.iwla.getConfValue('merge_feeds_parsers', False) if feeds is None: return False @@ -88,15 +88,21 @@ class IWLAPostAnalysisFeeds(IPlugin): return True - def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): - if isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1: + def mergeFeedsParsers(self, isFeedParser, one_hit_only, hit): + # One hit only match + if isFeedParser: #isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1: user_agent = hit['requests'][0]['http_user_agent'].lower() + # First time, register into dict if one_hit_only.get(user_agent, None) is None: # Merged - isFeedParser = self.MERGED_FEED_PARSER - one_hit_only[user_agent] = (hit) + one_hit_only[user_agent] = hit else: - isFeedParser = self.NOT_A_FEED_PARSER + # Next time + # Current must be ignored + hit['feed_parser'] = self.NOT_A_FEED_PARSER + # Previous matched hit must be set as merged + isFeedParser = self.MERGED_FEED_PARSER + hit = one_hit_only[user_agent] hit['feed_parser'] = isFeedParser def hook(self): @@ -105,9 +111,11 @@ class IWLAPostAnalysisFeeds(IPlugin): for hit in hits.values(): isFeedParser = hit.get('feed_parser', None) - if isFeedParser == self.FEED_PARSER and\ - self.merge_one_hit_only_feeds_parsers: - self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) + # Register already tagged feed parser in one_hit_only + if self.merge_feeds_parsers and\ + not isFeedParser in (None, self.BAD_FEED_PARSER): + self.mergeFeedsParsers(isFeedParser, one_hit_only, hit) + continue if isFeedParser: if hit['feed_parser'] == self.BAD_FEED_PARSER: continue @@ -118,8 +126,8 @@ class IWLAPostAnalysisFeeds(IPlugin): for r in self.bad_feeds_re: if r.match(addr): hit['feed_parser'] = self.BAD_FEED_PARSER - return - return + break + continue request = hit['requests'][0] isFeedParser = self.NOT_A_FEED_PARSER @@ -148,7 +156,7 @@ class IWLAPostAnalysisFeeds(IPlugin): isFeedParser = self.FEED_PARSER break - if self.merge_one_hit_only_feeds_parsers: - self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) + if self.merge_feeds_parsers: + self.mergeFeedsParsers(isFeedParser, one_hit_only, hit) else: hit['feed_parser'] = isFeedParser