2014-11-26 22:03:19 +01:00
|
|
|
import re
|
|
|
|
|
|
|
|
from iwla import IWLA
|
|
|
|
from iplugin import IPlugin
|
|
|
|
|
2014-12-10 21:15:56 +01:00
|
|
|
#
|
|
|
|
# Post analysis hook
|
|
|
|
#
|
|
|
|
# Count TOP pages
|
|
|
|
#
|
|
|
|
# Plugin requirements :
|
|
|
|
# None
|
|
|
|
#
|
|
|
|
# Conf values needed :
|
|
|
|
# None
|
|
|
|
#
|
|
|
|
# Output files :
|
|
|
|
# None
|
|
|
|
#
|
|
|
|
# Statistics creation :
|
|
|
|
# None
|
|
|
|
#
|
|
|
|
# Statistics update :
|
|
|
|
# month_stats:
|
|
|
|
# top_pages =>
|
|
|
|
# uri
|
|
|
|
#
|
|
|
|
# Statistics deletion :
|
|
|
|
# None
|
|
|
|
#
|
|
|
|
|
2014-11-26 22:03:19 +01:00
|
|
|
class IWLAPostAnalysisTopPages(IPlugin):
|
|
|
|
def __init__(self, iwla):
|
|
|
|
super(IWLAPostAnalysisTopPages, self).__init__(iwla)
|
|
|
|
self.API_VERSION = 1
|
|
|
|
|
|
|
|
def load(self):
|
|
|
|
self.index_re = re.compile(r'/index.*')
|
|
|
|
return True
|
|
|
|
|
|
|
|
def hook(self):
|
|
|
|
stats = self.iwla.getCurrentVisists()
|
|
|
|
month_stats = self.iwla.getMonthStats()
|
|
|
|
|
|
|
|
top_pages = month_stats.get('top_pages', {})
|
|
|
|
|
|
|
|
for (k, super_hit) in stats.items():
|
|
|
|
if super_hit['robot']: continue
|
2014-12-14 15:10:13 +01:00
|
|
|
for r in super_hit['requests'][::-1]:
|
|
|
|
if not self.iwla.isValidForCurrentAnalysis(r):
|
|
|
|
break
|
|
|
|
if not self.iwla.hasBeenViewed(r) or\
|
|
|
|
not r['is_page']:
|
2014-11-27 13:46:58 +01:00
|
|
|
continue
|
2014-11-26 22:03:19 +01:00
|
|
|
|
|
|
|
uri = r['extract_request']['extract_uri']
|
|
|
|
if self.index_re.match(uri):
|
|
|
|
uri = '/'
|
|
|
|
|
|
|
|
uri = "%s%s" % (r.get('server_name', ''), uri)
|
|
|
|
|
|
|
|
if not uri in top_pages.keys():
|
|
|
|
top_pages[uri] = 1
|
|
|
|
else:
|
|
|
|
top_pages[uri] += 1
|
|
|
|
|
|
|
|
month_stats['top_pages'] = top_pages
|