Merge one hit only parsers in feeds parsers detection
This commit is contained in:
parent
efb5ddf761
commit
cfbd35d818
|
@ -67,7 +67,7 @@ class IWLADisplayFeeds(IPlugin):
|
||||||
|
|
||||||
# All in a page
|
# All in a page
|
||||||
if self.create_all_feeds_page:
|
if self.create_all_feeds_page:
|
||||||
title = createCurTitle(self.iwla, u'All Feeds parsers')
|
title = createCurTitle(self.iwla, self.iwla._(u'All Feeds parsers'))
|
||||||
filename = 'all_feeds.html'
|
filename = 'all_feeds.html'
|
||||||
path = self.iwla.getCurDisplayPath(filename)
|
path = self.iwla.getCurDisplayPath(filename)
|
||||||
display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False)
|
display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False)
|
||||||
|
@ -81,6 +81,9 @@ class IWLADisplayFeeds(IPlugin):
|
||||||
if display_visitor_ip and\
|
if display_visitor_ip and\
|
||||||
super_hit.get('dns_name_replaced', False):
|
super_hit.get('dns_name_replaced', False):
|
||||||
address = '%s [%s]' % (address, super_hit['remote_ip'])
|
address = '%s [%s]' % (address, super_hit['remote_ip'])
|
||||||
|
if super_hit['robot']:
|
||||||
|
table.appendRow([address, super_hit['not_viewed_pages'], super_hit['not_viewed_hits']])
|
||||||
|
else:
|
||||||
table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']])
|
table.appendRow([address, super_hit['viewed_pages'], super_hit['viewed_hits']])
|
||||||
page.appendBlock(table)
|
page.appendBlock(table)
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,8 @@ from iplugin import IPlugin
|
||||||
Post analysis hook
|
Post analysis hook
|
||||||
|
|
||||||
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
|
Find feeds parsers (first hit in feeds conf value and no viewed pages if it's a robot)
|
||||||
|
If there is ony one hit per day to a feed, merge feeds parsers with the same user agent
|
||||||
|
as it must be the same person with a different IP address.
|
||||||
|
|
||||||
Plugin requirements :
|
Plugin requirements :
|
||||||
None
|
None
|
||||||
|
@ -64,18 +66,34 @@ class IWLAPostAnalysisFeeds(IPlugin):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def mergeOneHitOnlyFeedsParsers(self, isFeedParser, one_hit_only, hit):
|
||||||
|
if isFeedParser and (hit['viewed_hits'] + hit['not_viewed_hits']) == 1:
|
||||||
|
user_agent = hit['requests'][0]['http_user_agent'].lower()
|
||||||
|
if one_hit_only.get(user_agent, None) is None:
|
||||||
|
one_hit_only[user_agent] = (hit)
|
||||||
|
else:
|
||||||
|
isFeedParser = False
|
||||||
|
hit['feed_parser'] = isFeedParser
|
||||||
|
|
||||||
def hook(self):
|
def hook(self):
|
||||||
hits = self.iwla.getCurrentVisists()
|
hits = self.iwla.getCurrentVisists()
|
||||||
|
one_hit_only = {}
|
||||||
for hit in hits.values():
|
for hit in hits.values():
|
||||||
if not hit.get('feed_parser', None) is None: continue
|
isFeedParser = hit.get('feed_parser', None)
|
||||||
|
|
||||||
|
if isFeedParser == True:
|
||||||
|
self.mergeOneHitOnlyFeedsParsers(one_hit_only, hit)
|
||||||
|
|
||||||
|
if not isFeedParser is None: continue
|
||||||
|
|
||||||
isFeedParser = False
|
isFeedParser = False
|
||||||
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
|
uri = hit['requests'][0]['extract_request']['extract_uri'].lower()
|
||||||
for regexp in self.feeds_re:
|
for regexp in self.feeds_re:
|
||||||
if regexp.match(uri):
|
if regexp.match(uri):
|
||||||
|
isFeedParser = True
|
||||||
# Robot that views pages -> bot
|
# Robot that views pages -> bot
|
||||||
if hit['robot']:
|
if hit['robot']:
|
||||||
if hit['viewed_pages']: continue
|
if hit['viewed_pages']:
|
||||||
isFeedParser = True
|
isFeedParser = False
|
||||||
break
|
break
|
||||||
hit['feed_parser'] = isFeedParser
|
self.mergeOneHitOnlyFeedsParsers(isFeedParser, one_hit_only, hit)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user