Add no_merge_feeds_parsers_list conf value
This commit is contained in:
parent
9939922c31
commit
70de0d3aca
|
@ -59,7 +59,7 @@ class IWLADisplayFeeds(IPlugin):
|
|||
return True
|
||||
|
||||
def hook(self):
|
||||
from plugins.post_analysis.feeds import IWLAPostAnalysisFeeds
|
||||
from plugins.pre_analysis.feeds import IWLAPostAnalysisFeeds
|
||||
|
||||
display = self.iwla.getDisplay()
|
||||
hits = self.iwla.getCurrentVisits()
|
||||
|
|
|
@ -25,7 +25,7 @@ from iwla import IWLA
|
|||
from iplugin import IPlugin
|
||||
|
||||
"""
|
||||
Post analysis hook
|
||||
Pre analysis hook
|
||||
|
||||
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
|
||||
If merge_feeds_parsers is set to True, merge feeds parsers with the same user agent
|
||||
|
@ -77,6 +77,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
feeds_agents = self.iwla.getConfValue('feeds_agents', [])
|
||||
self.merge_feeds_parsers = self.iwla.getConfValue('merge_feeds_parsers', False)
|
||||
_merge_feeds_parsers_list = self.iwla.getConfValue('merge_feeds_parsers_list', [])
|
||||
_no_merge_feeds_parsers_list = self.iwla.getConfValue('no_merge_feeds_parsers_list', [])
|
||||
|
||||
if feeds is None: return False
|
||||
|
||||
|
@ -104,6 +105,10 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
for f in _merge_feeds_parsers_list:
|
||||
self.merge_feeds_parsers_list.append(re.compile(f))
|
||||
|
||||
self.no_merge_feeds_parsers_list = []
|
||||
for f in _no_merge_feeds_parsers_list:
|
||||
self.no_merge_feeds_parsers_list.append(re.compile(f))
|
||||
|
||||
self.merged_feeds = {}
|
||||
|
||||
return True
|
||||
|
@ -134,8 +139,11 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
|
||||
def mergeFeedsParsers(self, isFeedParser, hit):
|
||||
if isFeedParser in (self.FEED_PARSER, self.MERGED_FEED_PARSER):
|
||||
for r in self.no_merge_feeds_parsers_list:
|
||||
if r.match(hit['remote_addr']) or r.match(hit['remote_ip']) or r.match(hit['requests'][0]['http_user_agent']):
|
||||
return
|
||||
for r in self.merge_feeds_parsers_list:
|
||||
if r.match(hit['remote_addr']) or r.match(hit['remote_ip']):
|
||||
if r.match(hit['remote_addr']) or r.match(hit['remote_ip']) or r.match(hit['requests'][0]['http_user_agent']):
|
||||
# One group can view multiple different feeds
|
||||
key = r.pattern + hit.get('feed_domain', '') + hit.get('feed_uri', '')
|
||||
self._appendToMergeCache(isFeedParser, key, hit)
|
||||
|
@ -158,16 +166,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
# Update last access time
|
||||
if hit['last_access'] > hit.get('feed_parser_last_access', time.gmtime(0)):
|
||||
hit['feed_parser_last_access'] = hit['last_access']
|
||||
|
||||
if not hit.get('feed_name_analyzed', False) and\
|
||||
hit.get('dns_name_replaced', False):
|
||||
hit['feed_name_analyzed'] = True
|
||||
addr = hit.get('remote_addr', None)
|
||||
for r in self.bad_feeds_re:
|
||||
if r.match(addr):
|
||||
hit['feed_parser'] = self.NOT_A_FEED_PARSER
|
||||
break
|
||||
|
||||
|
||||
# Register already tagged feed parser in merged_feeds
|
||||
if self.merge_feeds_parsers:
|
||||
self.mergeFeedsParsers(isFeedParser, hit)
|
||||
|
@ -177,12 +176,12 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
isFeedParser = self.NOT_A_FEED_PARSER
|
||||
uri = request['extract_request']['extract_uri'].lower()
|
||||
for regexp in self.feeds_re:
|
||||
if regexp.match(uri):
|
||||
if regexp.match(uri) and self.iwla.hasBeenViewed(request):
|
||||
isFeedParser = self.FEED_PARSER
|
||||
# Robot that views pages -> bot
|
||||
if hit['robot']:
|
||||
if hit['not_viewed_pages'][0]:
|
||||
isFeedParser = self.NOT_A_FEED_PARSER
|
||||
# # Robot that views pages -> bot
|
||||
# if hit['robot']:
|
||||
# if hit['not_viewed_pages'][0]:
|
||||
# isFeedParser = self.NOT_A_FEED_PARSER
|
||||
break
|
||||
|
||||
user_agent = request['http_user_agent'].lower()
|
||||
|
@ -199,14 +198,17 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
isFeedParser = self.NOT_A_FEED_PARSER
|
||||
break
|
||||
|
||||
if not hit.get('feed_name_analyzed', False) and\
|
||||
hit.get('dns_name_replaced', False):
|
||||
hit['feed_name_analyzed'] = True
|
||||
addr = hit.get('remote_addr', None)
|
||||
for r in self.bad_feeds_re:
|
||||
if r.match(addr):
|
||||
isFeedParser = hit['feed_parser'] = self.NOT_A_FEED_PARSER
|
||||
break
|
||||
if isFeedParser == self.FEED_PARSER:
|
||||
if not hit.get('dns_name_replaced', False):
|
||||
self.iwla.reverseDNS(hit)
|
||||
|
||||
if not hit.get('feed_name_analyzed', False):
|
||||
hit['feed_name_analyzed'] = True
|
||||
addr = hit.get('remote_addr', None)
|
||||
for r in self.bad_feeds_re:
|
||||
if r.match(addr):
|
||||
isFeedParser = self.NOT_A_FEED_PARSER
|
||||
break
|
||||
|
||||
if isFeedParser == self.FEED_PARSER:
|
||||
hit['feed_domain'] = request['server_name']
|
||||
|
@ -217,6 +219,7 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
if subscribers:
|
||||
hit['feed_subscribers'] = int(subscribers.groups()[0])
|
||||
|
||||
hit['robot'] = True
|
||||
hit['feed_parser'] = isFeedParser
|
||||
if self.merge_feeds_parsers:
|
||||
self.mergeFeedsParsers(isFeedParser, hit)
|
||||
|
|
Loading…
Reference in New Issue
Block a user