# -*- coding: utf-8 -*- # # Copyright Grégory Soutadé 2015 # This file is part of iwla # iwla is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # iwla is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with iwla. If not, see . # import re from iwla import IWLA from iplugin import IPlugin """ Post analysis hook Count TOP pages Plugin requirements : None Conf values needed : None Output files : None Statistics creation : None Statistics update : month_stats: top_pages => uri => count Statistics deletion : None """ class IWLAPostAnalysisTopPages(IPlugin): def __init__(self, iwla): super(IWLAPostAnalysisTopPages, self).__init__(iwla) self.API_VERSION = 1 def load(self): self.index_re = re.compile(r'/index.*') return True def hook(self): stats = self.iwla.getCurrentVisits() month_stats = self.iwla.getMonthStats() top_pages = month_stats.get('top_pages', {}) for (k, super_hit) in stats.items(): if super_hit['robot']: continue for r in super_hit['requests'][::-1]: if not self.iwla.isValidForCurrentAnalysis(r): break if not self.iwla.hasBeenViewed(r) or\ not r['is_page']: continue uri = r['extract_request']['extract_uri'] if self.index_re.match(uri): uri = '/' uri = "%s%s" % (r.get('server_name', ''), uri) if not uri in top_pages.keys(): top_pages[uri] = 1 else: top_pages[uri] += 1 month_stats['top_pages'] = top_pages