Change merge_one_hit_only_feeds_parsers by merge_feeds_parsers and set it to False by default
This commit is contained in:
parent
c9bc21a506
commit
ad01b48898
|
@ -27,7 +27,7 @@ from iplugin import IPlugin
|
||||||
Post analysis hook
|
Post analysis hook
|
||||||
|
|
||||||
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
|
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
|
||||||
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent
|
If merge_feeds_parsers is set to True, merge feeds parsers with the same user agent
|
||||||
as it must be the same person with a different IP address.
|
as it must be the same person with a different IP address.
|
||||||
|
|
||||||
Plugin requirements :
|
Plugin requirements :
|
||||||
|
@ -36,7 +36,7 @@ Plugin requirements :
|
||||||
Conf values needed :
|
Conf values needed :
|
||||||
feeds
|
feeds
|
||||||
feeds_referers*
|
feeds_referers*
|
||||||
merge_one_hit_only_feeds_parsers*
|
merge_feeds_parsers*
|
||||||
|
|
||||||
Output files :
|
Output files :
|
||||||
None
|
None
|
||||||
|
@ -66,7 +66,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
def load(self):
|
def load(self):
|
||||||
feeds = self.iwla.getConfValue('feeds', [])
|
feeds = self.iwla.getConfValue('feeds', [])
|
||||||
feeds_referers = self.iwla.getConfValue('feeds_referers', [])
|
feeds_referers = self.iwla.getConfValue('feeds_referers', [])
|
||||||
self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True)
|
self.merge_feeds_parsers = self.iwla.getConfValue('merge_feeds_parsers', False)
|
||||||
|
|
||||||
if feeds is None: return False
|
if feeds is None: return False
|
||||||
|
|
||||||
|
@ -88,15 +88,21 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
def mergeFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
||||||
if isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1:
|
# One hit only match
|
||||||
|
if isFeedParser: #isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1:
|
||||||
user_agent = hit['requests'][0]['http_user_agent'].lower()
|
user_agent = hit['requests'][0]['http_user_agent'].lower()
|
||||||
|
# First time, register into dict
|
||||||
if one_hit_only.get(user_agent, None) is None:
|
if one_hit_only.get(user_agent, None) is None:
|
||||||
# Merged
|
# Merged
|
||||||
isFeedParser = self.MERGED_FEED_PARSER
|
one_hit_only[user_agent] = hit
|
||||||
one_hit_only[user_agent] = (hit)
|
|
||||||
else:
|
else:
|
||||||
isFeedParser = self.NOT_A_FEED_PARSER
|
# Next time
|
||||||
|
# Current must be ignored
|
||||||
|
hit['feed_parser'] = self.NOT_A_FEED_PARSER
|
||||||
|
# Previous matched hit must be set as merged
|
||||||
|
isFeedParser = self.MERGED_FEED_PARSER
|
||||||
|
hit = one_hit_only[user_agent]
|
||||||
hit['feed_parser'] = isFeedParser
|
hit['feed_parser'] = isFeedParser
|
||||||
|
|
||||||
def hook(self):
|
def hook(self):
|
||||||
|
@ -105,9 +111,11 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
for hit in hits.values():
|
for hit in hits.values():
|
||||||
isFeedParser = hit.get('feed_parser', None)
|
isFeedParser = hit.get('feed_parser', None)
|
||||||
|
|
||||||
if isFeedParser == self.FEED_PARSER and\
|
# Register already tagged feed parser in one_hit_only
|
||||||
self.merge_one_hit_only_feeds_parsers:
|
if self.merge_feeds_parsers and\
|
||||||
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
not isFeedParser in (None, self.BAD_FEED_PARSER):
|
||||||
|
self.mergeFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||||
|
continue
|
||||||
|
|
||||||
if isFeedParser:
|
if isFeedParser:
|
||||||
if hit['feed_parser'] == self.BAD_FEED_PARSER: continue
|
if hit['feed_parser'] == self.BAD_FEED_PARSER: continue
|
||||||
|
@ -118,8 +126,8 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
for r in self.bad_feeds_re:
|
for r in self.bad_feeds_re:
|
||||||
if r.match(addr):
|
if r.match(addr):
|
||||||
hit['feed_parser'] = self.BAD_FEED_PARSER
|
hit['feed_parser'] = self.BAD_FEED_PARSER
|
||||||
return
|
break
|
||||||
return
|
continue
|
||||||
|
|
||||||
request = hit['requests'][0]
|
request = hit['requests'][0]
|
||||||
isFeedParser = self.NOT_A_FEED_PARSER
|
isFeedParser = self.NOT_A_FEED_PARSER
|
||||||
|
@ -148,7 +156,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
isFeedParser = self.FEED_PARSER
|
isFeedParser = self.FEED_PARSER
|
||||||
break
|
break
|
||||||
|
|
||||||
if self.merge_one_hit_only_feeds_parsers:
|
if self.merge_feeds_parsers:
|
||||||
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
self.mergeFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||||
else:
|
else:
|
||||||
hit['feed_parser'] = isFeedParser
|
hit['feed_parser'] = isFeedParser
|
||||||
|
|
Loading…
Reference in New Issue
Block a user