# -*- coding: utf-8 -*- # # Copyright Grégory Soutadé 2015 # This file is part of iwla # iwla is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # iwla is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with iwla. If not, see . # import re from iwla import IWLA from iplugin import IPlugin # # Post analysis hook # # Count TOP pages # # Plugin requirements : # None # # Conf values needed : # None # # Output files : # None # # Statistics creation : # None # # Statistics update : # month_stats: # top_pages => # uri # # Statistics deletion : # None # class IWLAPostAnalysisTopPages(IPlugin): def __init__(self, iwla): super(IWLAPostAnalysisTopPages, self).__init__(iwla) self.API_VERSION = 1 def load(self): self.index_re = re.compile(r'/index.*') return True def hook(self): stats = self.iwla.getCurrentVisists() month_stats = self.iwla.getMonthStats() top_pages = month_stats.get('top_pages', {}) for (k, super_hit) in stats.items(): if super_hit['robot']: continue for r in super_hit['requests'][::-1]: if not self.iwla.isValidForCurrentAnalysis(r): break if not self.iwla.hasBeenViewed(r) or\ not r['is_page']: continue uri = r['extract_request']['extract_uri'] if self.index_re.match(uri): uri = '/' uri = "%s%s" % (r.get('server_name', ''), uri) if not uri in top_pages.keys(): top_pages[uri] = 1 else: top_pages[uri] += 1 month_stats['top_pages'] = top_pages