Add top_pages plugin
This commit is contained in:
parent
f8a48a7144
commit
5e965f4cc1
4
conf.py
4
conf.py
|
@ -16,9 +16,9 @@ DB_ROOT = './output/'
|
||||||
DISPLAY_ROOT = './output/'
|
DISPLAY_ROOT = './output/'
|
||||||
|
|
||||||
pre_analysis_hooks = ['page_to_hit', 'robots']
|
pre_analysis_hooks = ['page_to_hit', 'robots']
|
||||||
post_analysis_hooks = ['referers']
|
post_analysis_hooks = ['referers', 'top_pages']
|
||||||
# post_analysis_hooks = ['top_visitors', 'reverse_dns']
|
# post_analysis_hooks = ['top_visitors', 'reverse_dns']
|
||||||
display_hooks = ['top_visitors', 'all_visits', 'referers']
|
display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages']
|
||||||
|
|
||||||
reverse_dns_timeout = 0.2
|
reverse_dns_timeout = 0.2
|
||||||
page_to_hit_conf = [r'^.+/logo/$']
|
page_to_hit_conf = [r'^.+/logo/$']
|
||||||
|
|
5
iwla.py
5
iwla.py
|
@ -140,9 +140,8 @@ class IWLA(object):
|
||||||
request = hit['extract_request']
|
request = hit['extract_request']
|
||||||
|
|
||||||
if 'extract_uri' in request.keys():
|
if 'extract_uri' in request.keys():
|
||||||
uri = request['extract_uri']
|
uri = request['extract_uri'] = request['http_uri']
|
||||||
else:
|
uri = request['extract_uri']
|
||||||
uri = request['http_uri']
|
|
||||||
|
|
||||||
hit['is_page'] = self.isPage(uri)
|
hit['is_page'] = self.isPage(uri)
|
||||||
|
|
||||||
|
|
42
plugins/display/top_pages.py
Normal file
42
plugins/display/top_pages.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
from iwla import IWLA
|
||||||
|
from iplugin import IPlugin
|
||||||
|
from display import *
|
||||||
|
|
||||||
|
class IWLADisplayTopPages(IPlugin):
|
||||||
|
def __init__(self, iwla):
|
||||||
|
super(IWLADisplayTopPages, self).__init__(iwla)
|
||||||
|
self.API_VERSION = 1
|
||||||
|
self.requires = ['IWLAPostAnalysisTopPages']
|
||||||
|
|
||||||
|
def hook(self):
|
||||||
|
top_pages = self.iwla.getMonthStats()['top_pages']
|
||||||
|
|
||||||
|
top_pages = sorted(top_pages.items(), key=lambda t: t[1], reverse=True)
|
||||||
|
|
||||||
|
index = self.iwla.getDisplayIndex()
|
||||||
|
|
||||||
|
table = DisplayHTMLBlockTable('Top Pages', ['URI', 'Entrance'])
|
||||||
|
for (uri, entrance) in top_pages[:10]:
|
||||||
|
table.appendRow([uri, entrance])
|
||||||
|
index.appendBlock(table)
|
||||||
|
|
||||||
|
cur_time = self.iwla.getCurTime()
|
||||||
|
title = time.strftime('Top Pages - %B %Y', cur_time)
|
||||||
|
|
||||||
|
filename = 'top_pages_%d.html' % (cur_time.tm_mon)
|
||||||
|
path = '%d/%s' % (cur_time.tm_year, filename)
|
||||||
|
|
||||||
|
page = DisplayHTMLPage(title, path)
|
||||||
|
table = DisplayHTMLBlockTable('Top Pages', ['URI', 'Entrance'])
|
||||||
|
for (uri, entrance) in top_pages:
|
||||||
|
table.appendRow([uri, entrance])
|
||||||
|
page.appendBlock(table)
|
||||||
|
|
||||||
|
display = self.iwla.getDisplay()
|
||||||
|
display.addPage(page)
|
||||||
|
|
||||||
|
block = DisplayHTMLRawBlock()
|
||||||
|
block.setRawHTML('<a href=\'%s\'>All pages</a>' % (filename))
|
||||||
|
index.appendBlock(block)
|
|
@ -70,6 +70,7 @@ class IWLAPostAnalysisReferers(IPlugin):
|
||||||
start_time = time.mktime(start_time)
|
start_time = time.mktime(start_time)
|
||||||
stats = self.iwla.getCurrentVisists()
|
stats = self.iwla.getCurrentVisists()
|
||||||
month_stats = self.iwla.getMonthStats()
|
month_stats = self.iwla.getMonthStats()
|
||||||
|
|
||||||
referers = month_stats.get('referers', {})
|
referers = month_stats.get('referers', {})
|
||||||
robots_referers = month_stats.get('robots_referers', {})
|
robots_referers = month_stats.get('robots_referers', {})
|
||||||
search_engine_referers = month_stats.get('search_engine_referers', {})
|
search_engine_referers = month_stats.get('search_engine_referers', {})
|
||||||
|
|
43
plugins/post_analysis/top_pages.py
Normal file
43
plugins/post_analysis/top_pages.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
|
||||||
|
from iwla import IWLA
|
||||||
|
from iplugin import IPlugin
|
||||||
|
|
||||||
|
class IWLAPostAnalysisTopPages(IPlugin):
|
||||||
|
def __init__(self, iwla):
|
||||||
|
super(IWLAPostAnalysisTopPages, self).__init__(iwla)
|
||||||
|
self.API_VERSION = 1
|
||||||
|
|
||||||
|
def load(self):
|
||||||
|
self.index_re = re.compile(r'/index.*')
|
||||||
|
return True
|
||||||
|
|
||||||
|
def hook(self):
|
||||||
|
start_time = self.iwla.getStartAnalysisTime()
|
||||||
|
start_time = time.mktime(start_time)
|
||||||
|
|
||||||
|
stats = self.iwla.getCurrentVisists()
|
||||||
|
month_stats = self.iwla.getMonthStats()
|
||||||
|
|
||||||
|
top_pages = month_stats.get('top_pages', {})
|
||||||
|
|
||||||
|
for (k, super_hit) in stats.items():
|
||||||
|
if super_hit['robot']: continue
|
||||||
|
for r in super_hit['requests']:
|
||||||
|
if not r['is_page']: continue
|
||||||
|
|
||||||
|
if time.mktime(r['time_decoded']) < start_time: continue
|
||||||
|
|
||||||
|
uri = r['extract_request']['extract_uri']
|
||||||
|
if self.index_re.match(uri):
|
||||||
|
uri = '/'
|
||||||
|
|
||||||
|
uri = "%s%s" % (r.get('server_name', ''), uri)
|
||||||
|
|
||||||
|
if not uri in top_pages.keys():
|
||||||
|
top_pages[uri] = 1
|
||||||
|
else:
|
||||||
|
top_pages[uri] += 1
|
||||||
|
|
||||||
|
month_stats['top_pages'] = top_pages
|
Loading…
Reference in New Issue
Block a user