Add a star for merged feeds parsers
This commit is contained in:
@@ -35,6 +35,7 @@ Plugin requirements :
|
||||
|
||||
Conf values needed :
|
||||
feeds
|
||||
merge_one_hit_only_feeds_parsers*
|
||||
|
||||
Output files :
|
||||
None
|
||||
@@ -51,12 +52,18 @@ Statistics deletion :
|
||||
"""
|
||||
|
||||
class IWLAPostAnalysisFeeds(IPlugin):
|
||||
NOT_A_FEED_PARSER = 0
|
||||
FEED_PARSER = 1
|
||||
MERGED_FEED_PARSER = 2
|
||||
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisFeeds, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
self.conf_requires = ['feeds']
|
||||
|
||||
def load(self):
|
||||
feeds = self.iwla.getConfValue('feeds', None)
|
||||
self.merge_one_hit_only_feeds_parsers = self.iwla.getConfValue('merge_one_hit_only_feeds_parsers', True)
|
||||
|
||||
if feeds is None: return False
|
||||
|
||||
@@ -70,9 +77,11 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||
if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1:
|
||||
user_agent = hit['requests'][0]['http_user_agent'].lower()
|
||||
if one_hit_only.get(user_agent, None) is None:
|
||||
# Merged
|
||||
isFeedParser = self.MERGED_FEED_PARSER
|
||||
one_hit_only[user_agent] = (hit)
|
||||
else:
|
||||
isFeedParser = False
|
||||
isFeedParser = self.NOT_A_FEED_PARSER
|
||||
hit['feed_parser'] = isFeedParser
|
||||
|
||||
def hook(self):
|
||||
@@ -81,19 +90,23 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||
for hit in hits.values():
|
||||
isFeedParser = hit.get('feed_parser', None)
|
||||
|
||||
if isFeedParser == True:
|
||||
if isFeedParser == self.FEED_PARSER and\
|
||||
self.merge_one_hit_only_feeds_parsers:
|
||||
self.mergeOneHitOnlyFeedsParsers(one_hit_only, hit)
|
||||
|
||||
if not isFeedParser is None: continue
|
||||
|
||||
isFeedParser = False
|
||||
isFeedParser = self.NOT_A_FEED_PARSER
|
||||
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
|
||||
for regexp in self.feeds_re:
|
||||
if regexp.match(uri):
|
||||
isFeedParser = True
|
||||
isFeedParser = self.FEED_PARSER
|
||||
# Robot that views pages -> bot
|
||||
if hit['robot']:
|
||||
if hit['viewed_pages']:
|
||||
isFeedParser = False
|
||||
isFeedParser = self.NOT_A_FEED_PARSER
|
||||
break
|
||||
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||
if self.merge_one_hit_only_feeds_parsers:
|
||||
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||
else:
|
||||
hit['feed_parser'] = isFeedParser
|
||||
|
||||
Reference in New Issue
Block a user