Change merge_one_hit_only_feeds_parsers by merge_feeds_parsers and set it to False by default

This commit is contained in:
Gregory Soutade 2022-11-12 19:00:14 +01:00
parent c9bc21a506
commit ad01b48898

View File

@ -27,7 +27,7 @@ from iplugin import IPlugin
Post analysis hook Post analysis hook
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot) Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent If merge_feeds_parsers is set to True, merge feeds parsers with the same user agent
as it must be the same person with a different IP address. as it must be the same person with a different IP address.
Plugin requirements : Plugin requirements :
@ -36,7 +36,7 @@ Plugin requirements :
Conf values needed : Conf values needed :
feeds feeds
feeds_referers* feeds_referers*
merge_one_hit_only_feeds_parsers* merge_feeds_parsers*
Output files : Output files :
None None
@ -66,7 +66,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
def load(self): def load(self):
feeds = self.iwla.getConfValue('feeds', []) feeds = self.iwla.getConfValue('feeds', [])
feeds_referers = self.iwla.getConfValue('feeds_referers', []) feeds_referers = self.iwla.getConfValue('feeds_referers', [])
self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True) self.merge_feeds_parsers = self.iwla.getConfValue('merge_feeds_parsers', False)
if feeds is None: return False if feeds is None: return False
@ -88,15 +88,21 @@ class IWLAPostAnalysisFeeds(IPlugin):
return True return True
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit): def mergeFeedsParsers(self, isFeedParser, one_hit_only, hit):
if isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1: # One hit only match
if isFeedParser: #isFeedParser and (hit['viewed_hits'][0] + hit['not_viewed_hits'][0]) == 1:
user_agent = hit['requests'][0]['http_user_agent'].lower() user_agent = hit['requests'][0]['http_user_agent'].lower()
# First time, register into dict
if one_hit_only.get(user_agent, None) is None: if one_hit_only.get(user_agent, None) is None:
# Merged # Merged
isFeedParser = self.MERGED_FEED_PARSER one_hit_only[user_agent] = hit
one_hit_only[user_agent] = (hit)
else: else:
isFeedParser = self.NOT_A_FEED_PARSER # Next time
# Current must be ignored
hit['feed_parser'] = self.NOT_A_FEED_PARSER
# Previous matched hit must be set as merged
isFeedParser = self.MERGED_FEED_PARSER
hit = one_hit_only[user_agent]
hit['feed_parser'] = isFeedParser hit['feed_parser'] = isFeedParser
def hook(self): def hook(self):
@ -105,9 +111,11 @@ class IWLAPostAnalysisFeeds(IPlugin):
for hit in hits.values(): for hit in hits.values():
isFeedParser = hit.get('feed_parser', None) isFeedParser = hit.get('feed_parser', None)
if isFeedParser == self.FEED_PARSER and\ # Register already tagged feed parser in one_hit_only
self.merge_one_hit_only_feeds_parsers: if self.merge_feeds_parsers and\
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) not isFeedParser in (None, self.BAD_FEED_PARSER):
self.mergeFeedsParsers(isFeedParser, one_hit_only, hit)
continue
if isFeedParser: if isFeedParser:
if hit['feed_parser'] == self.BAD_FEED_PARSER: continue if hit['feed_parser'] == self.BAD_FEED_PARSER: continue
@ -118,8 +126,8 @@ class IWLAPostAnalysisFeeds(IPlugin):
for r in self.bad_feeds_re: for r in self.bad_feeds_re:
if r.match(addr): if r.match(addr):
hit['feed_parser'] = self.BAD_FEED_PARSER hit['feed_parser'] = self.BAD_FEED_PARSER
return break
return continue
request = hit['requests'][0] request = hit['requests'][0]
isFeedParser = self.NOT_A_FEED_PARSER isFeedParser = self.NOT_A_FEED_PARSER
@ -148,7 +156,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
isFeedParser = self.FEED_PARSER isFeedParser = self.FEED_PARSER
break break
if self.merge_one_hit_only_feeds_parsers: if self.merge_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit) self.mergeFeedsParsers(isFeedParser, one_hit_only, hit)
else: else:
hit['feed_parser'] = isFeedParser hit['feed_parser'] = isFeedParser