Add a star for merged feeds parsers

This commit is contained in:
Gregory Soutade
2015-02-18 20:56:03 +01:00
parent cfbd35d818
commit f5b0c35bad
2 changed files with 24 additions and 9 deletions

View File

@@ -35,6 +35,7 @@ Plugin requirements :
Conf values needed :
feeds
merge_one_hit_only_feeds_parsers*
Output files :
None
@@ -51,12 +52,18 @@ Statistics deletion :
"""
class IWLAPostAnalysisFeeds(IPlugin):
NOT_A_FEED_PARSER = 0
FEED_PARSER = 1
MERGED_FEED_PARSER = 2
def __init__(self, iwla):
super(IWLAPostAnalysisFeeds, self).__init__(iwla)
self.API_VERSION = 1
self.conf_requires = ['feeds']
def load(self):
feeds = self.iwla.getConfValue('feeds', None)
self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True)
if feeds is None: return False
@@ -70,9 +77,11 @@ class IWLAPostAnalysisFeeds(IPlugin):
if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1:
user_agent = hit['requests'][0]['http_user_agent'].lower()
if one_hit_only.get(user_agent, None) is None:
# Merged
isFeedParser = self.MERGED_FEED_PARSER
one_hit_only[user_agent] = (hit)
else:
isFeedParser = False
isFeedParser = self.NOT_A_FEED_PARSER
hit['feed_parser'] = isFeedParser
def hook(self):
@@ -81,19 +90,23 @@ class IWLAPostAnalysisFeeds(IPlugin):
for hit in hits.values():
isFeedParser = hit.get('feed_parser', None)
if isFeedParser == True:
if isFeedParser == self.FEED_PARSER and\
self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(one_hit_only, hit)
if not isFeedParser is None: continue
isFeedParser = False
isFeedParser = self.NOT_A_FEED_PARSER
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
for regexp in self.feeds_re:
if regexp.match(uri):
isFeedParser = True
isFeedParser = self.FEED_PARSER
# Robot that views pages -> bot
if hit['robot']:
if hit['viewed_pages']:
isFeedParser = False
isFeedParser = self.NOT_A_FEED_PARSER
break
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
if self.merge_one_hit_only_feeds_parsers:
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
else:
hit['feed_parser'] = isFeedParser