Update feed detector : check 'feed', 'rss' or 'atom' string in user agent
This commit is contained in:
		| @@ -75,6 +75,11 @@ class IWLAPostAnalysisFeeds(IPlugin): | ||||
|         self.bad_feeds_re = [] | ||||
|         self.bad_feeds_re.append(re.compile(r'.*crawl.*')) | ||||
|  | ||||
|         self.user_agents_re = [] | ||||
|         self.user_agents_re.append(re.compile(r'.*rss.*')) | ||||
|         self.user_agents_re.append(re.compile(r'.*atom.*')) | ||||
|         self.user_agents_re.append(re.compile(r'.*feed.*')) | ||||
|  | ||||
|         return True | ||||
|          | ||||
|     def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): | ||||
| @@ -120,6 +125,14 @@ class IWLAPostAnalysisFeeds(IPlugin): | ||||
|                         if hit['not_viewed_pages'][0]: | ||||
|                             isFeedParser = self.NOT_A_FEED_PARSER | ||||
|                     break | ||||
|  | ||||
|             if isFeedParser == self.NOT_A_FEED_PARSER: | ||||
|                 user_agent = hit['requests'][0]['http_user_agent'].lower() | ||||
|                 for regexp in self.user_agents_re: | ||||
|                     if regexp.match(user_agent): | ||||
|                         isFeedParser = self.FEED_PARSER | ||||
|                         break | ||||
|                  | ||||
|             if self.merge_one_hit_only_feeds_parsers: | ||||
|                 self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) | ||||
|             else: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user