Change merge_one_hit_only_feeds_parsers by merge_feeds_parsers and set it to False by default
This commit is contained in:
parent
c9bc21a506
commit
ad01b48898
|
@ -27,7 +27,7 @@ from iplugin import IPlugin
|
|||
Post analysis hook
|
||||
|
||||
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
|
||||
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent
|
||||
If merge_feeds_parsers is set to True, merge feeds parsers with the same user agent
|
||||
as it must be the same person with a different IP address.
|
||||
|
||||
Plugin requirements :
|
||||
|
@ -36,7 +36,7 @@ Plugin requirements :
|
|||
Conf values needed :
|
||||
feeds
|
||||
feeds_referers*
|
||||
merge_one_hit_only_feeds_parsers*
|
||||
merge_feeds_parsers*
|
||||
|
||||
Output files :
|
||||
None
|
||||
|
@ -66,7 +66,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
def load(self):
|
||||
feeds = self.iwla.getConfValue('feeds', [])
|
||||
feeds_referers = self.iwla.getConfValue('feeds_referers', [])
|
||||
self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True)
|
||||
self.merge_feeds_parsers = self.iwla.getConfValue('merge_feeds_parsers', False)
|
||||
|
||||
if feeds is None: return False
|
||||
|
||||
|
@ -88,15 +88,21 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
|
||||
return True
|
||||
|
||||
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
||||
if isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1:
|
||||
def mergeFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
||||
# One hit only match
|
||||
if isFeedParser: #isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1:
|
||||
user_agent = hit['requests'][0]['http_user_agent'].lower()
|
||||
# First time, register into dict
|
||||
if one_hit_only.get(user_agent, None) is None:
|
||||
# Merged
|
||||
isFeedParser = self.MERGED_FEED_PARSER
|
||||
one_hit_only[user_agent] = (hit)
|
||||
one_hit_only[user_agent] = hit
|
||||
else:
|
||||
isFeedParser = self.NOT_A_FEED_PARSER
|
||||
# Next time
|
||||
# Current must be ignored
|
||||
hit['feed_parser'] = self.NOT_A_FEED_PARSER
|
||||
# Previous matched hit must be set as merged
|
||||
isFeedParser = self.MERGED_FEED_PARSER
|
||||
hit = one_hit_only[user_agent]
|
||||
hit['feed_parser'] = isFeedParser
|
||||
|
||||
def hook(self):
|
||||
|
@ -105,9 +111,11 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
for hit in hits.values():
|
||||
isFeedParser = hit.get('feed_parser', None)
|
||||
|
||||
if isFeedParser == self.FEED_PARSER and\
|
||||
self.merge_one_hit_only_feeds_parsers:
|
||||
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||
# Register already tagged feed parser in one_hit_only
|
||||
if self.merge_feeds_parsers and\
|
||||
not isFeedParser in (None, self.BAD_FEED_PARSER):
|
||||
self.mergeFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||
continue
|
||||
|
||||
if isFeedParser:
|
||||
if hit['feed_parser'] == self.BAD_FEED_PARSER: continue
|
||||
|
@ -118,8 +126,8 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
for r in self.bad_feeds_re:
|
||||
if r.match(addr):
|
||||
hit['feed_parser'] = self.BAD_FEED_PARSER
|
||||
return
|
||||
return
|
||||
break
|
||||
continue
|
||||
|
||||
request = hit['requests'][0]
|
||||
isFeedParser = self.NOT_A_FEED_PARSER
|
||||
|
@ -148,7 +156,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
isFeedParser = self.FEED_PARSER
|
||||
break
|
||||
|
||||
if self.merge_one_hit_only_feeds_parsers:
|
||||
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||
if self.merge_feeds_parsers:
|
||||
self.mergeFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||
else:
|
||||
hit['feed_parser'] = isFeedParser
|
||||
|
|
Loading…
Reference in New Issue
Block a user