Merge one hit only parsers in feeds parsers detection
This commit is contained in:
parent
efb5ddf761
commit
cfbd35d818
|
@ -67,7 +67,7 @@ class IWLADisplayFeeds(IPlugin):
|
|||
|
||||
# All in a page
|
||||
if self.create_all_feeds_page:
|
||||
title = createCurTitle(self.iwla, u'All Feeds parsers')
|
||||
title = createCurTitle(self.iwla, self.iwla._(u'All Feeds parsers'))
|
||||
filename = 'all_feeds.html'
|
||||
path = self.iwla.getCurDisplayPath(filename)
|
||||
display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False)
|
||||
|
@ -81,6 +81,9 @@ class IWLADisplayFeeds(IPlugin):
|
|||
if display_visitor_ip and\
|
||||
super_hit.get('dns_name_replaced', False):
|
||||
address = '%s [%s]' % (address, super_hit['remote_ip'])
|
||||
if super_hit['robot']:
|
||||
table.appendRow([address, super_hit['not_viewed_pages'], super_hit['not_viewed_hits']])
|
||||
else:
|
||||
table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']])
|
||||
page.appendBlock(table)
|
||||
|
||||
|
|
|
@ -27,6 +27,8 @@ from iplugin import IPlugin
|
|||
Post analysis hook
|
||||
|
||||
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
|
||||
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent
|
||||
as it must be the same person with a different IP address.
|
||||
|
||||
Plugin requirements :
|
||||
None
|
||||
|
@ -64,18 +66,34 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
|||
|
||||
return True
|
||||
|
||||
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
||||
if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1:
|
||||
user_agent = hit['requests'][0]['http_user_agent'].lower()
|
||||
if one_hit_only.get(user_agent, None) is None:
|
||||
one_hit_only[user_agent] = (hit)
|
||||
else:
|
||||
isFeedParser = False
|
||||
hit['feed_parser'] = isFeedParser
|
||||
|
||||
def hook(self):
|
||||
hits = self.iwla.getCurrentVisists()
|
||||
one_hit_only = {}
|
||||
for hit in hits.values():
|
||||
if not hit.get('feed_parser', None) is None: continue
|
||||
isFeedParser = hit.get('feed_parser', None)
|
||||
|
||||
if isFeedParser == True:
|
||||
self.mergeOneHitOnlyFeedsParsers(one_hit_only, hit)
|
||||
|
||||
if not isFeedParser is None: continue
|
||||
|
||||
isFeedParser = False
|
||||
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
|
||||
for regexp in self.feeds_re:
|
||||
if regexp.match(uri):
|
||||
isFeedParser = True
|
||||
# Robot that views pages -> bot
|
||||
if hit['robot']:
|
||||
if hit['viewed_pages']: continue
|
||||
isFeedParser = True
|
||||
if hit['viewed_pages']:
|
||||
isFeedParser = False
|
||||
break
|
||||
hit['feed_parser'] = isFeedParser
|
||||
|
||||
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||
|
|
Loading…
Reference in New Issue
Block a user