iwla/plugins/post_analysis/top_pages.py

import time
import re

from iwla import IWLA
from iplugin import IPlugin

class IWLAPostAnalysisTopPages(IPlugin):
    def __init__(self, iwla):
        super(IWLAPostAnalysisTopPages, self).__init__(iwla)
        self.API_VERSION = 1

    def load(self):
        self.index_re = re.compile(r'/index.*')
        return True
        
    def hook(self):
        start_time = self.iwla.getStartAnalysisTime()
        start_time = time.mktime(start_time)

        stats = self.iwla.getCurrentVisists()
        month_stats = self.iwla.getMonthStats()
        
        top_pages = month_stats.get('top_pages', {})

        for (k, super_hit) in stats.items():
            if super_hit['robot']: continue
            for r in super_hit['requests']:
                if not r['is_page']: continue

                if time.mktime(r['time_decoded']) < start_time: continue

                uri = r['extract_request']['extract_uri']
                if self.index_re.match(uri):
                    uri = '/'
                
                uri = "%s%s" % (r.get('server_name', ''), uri)

                if not uri in top_pages.keys():
                    top_pages[uri] = 1
                else:
                    top_pages[uri] += 1

        month_stats['top_pages'] = top_pages
Add top_pages plugin 2014-11-26 22:03:19 +01:00			`import time`
			`import re`

			`from iwla import IWLA`
			`from iplugin import IPlugin`

			`class IWLAPostAnalysisTopPages(IPlugin):`
			`def __init__(self, iwla):`
			`super(IWLAPostAnalysisTopPages, self).__init__(iwla)`
			`self.API_VERSION = 1`

			`def load(self):`
			`self.index_re = re.compile(r'/index.*')`
			`return True`

			`def hook(self):`
			`start_time = self.iwla.getStartAnalysisTime()`
			`start_time = time.mktime(start_time)`

			`stats = self.iwla.getCurrentVisists()`
			`month_stats = self.iwla.getMonthStats()`

			`top_pages = month_stats.get('top_pages', {})`

			`for (k, super_hit) in stats.items():`
			`if super_hit['robot']: continue`
			`for r in super_hit['requests']:`
			`if not r['is_page']: continue`

			`if time.mktime(r['time_decoded']) < start_time: continue`

			`uri = r['extract_request']['extract_uri']`
			`if self.index_re.match(uri):`
			`uri = '/'`

			`uri = "%s%s" % (r.get('server_name', ''), uri)`

			`if not uri in top_pages.keys():`
			`top_pages[uri] = 1`
			`else:`
			`top_pages[uri] += 1`

			`month_stats['top_pages'] = top_pages`