diff --git a/plugins/post_analysis/feeds.py b/plugins/post_analysis/feeds.py index 76814df..ec256ea 100644 --- a/plugins/post_analysis/feeds.py +++ b/plugins/post_analysis/feeds.py @@ -75,6 +75,11 @@ class IWLAPostAnalysisFeeds(IPlugin): self.bad_feeds_re = [] self.bad_feeds_re.append(re.compile(r'.*crawl.*')) + self.user_agents_re = [] + self.user_agents_re.append(re.compile(r'.*rss.*')) + self.user_agents_re.append(re.compile(r'.*atom.*')) + self.user_agents_re.append(re.compile(r'.*feed.*')) + return True def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): @@ -120,6 +125,14 @@ class IWLAPostAnalysisFeeds(IPlugin): if hit['not_viewed_pages'][0]: isFeedParser = self.NOT_A_FEED_PARSER break + + if isFeedParser == self.NOT_A_FEED_PARSER: + user_agent = hit['requests'][0]['http_user_agent'].lower() + for regexp in self.user_agents_re: + if regexp.match(user_agent): + isFeedParser = self.FEED_PARSER + break + if self.merge_one_hit_only_feeds_parsers: self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) else: