# -*- coding: utf-8 -*- # # Copyright Grégory Soutadé 2015 # This file is part of iwla # iwla is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # iwla is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with iwla. If not, see . # import re import logging from iwla import IWLA from iplugin import IPlugin """ Pre analysis hook Change page into hit and hit into page into statistics Plugin requirements : None Conf values needed : page_to_hit_conf* hit_to_page_conf* Output files : None Statistics creation : None Statistics update : visits : remote_addr => is_page Statistics deletion : None """ class IWLAPreAnalysisPageToHit(IPlugin): def __init__(self, iwla): super(IWLAPreAnalysisPageToHit, self).__init__(iwla) self.API_VERSION = 1 def load(self): # Page to hit self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', []) self.ph_regexps = list(map(lambda r: re.compile(r), self.ph_regexps)) # Hit to page self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', []) self.hp_regexps = list(map(lambda r: re.compile(r), self.hp_regexps)) self.logger = logging.getLogger(self.__class__.__name__) return True def hook(self): hits = self.iwla.getCurrentVisits() for (k, super_hit) in hits.items(): if super_hit['robot']: continue for request in super_hit['requests'][::-1]: if not self.iwla.isValidForCurrentAnalysis(request): break if not self.iwla.hasBeenViewed(request): continue uri = request['extract_request']['extract_uri'] day = request['time_decoded'].tm_mday if request['is_page']: # Page to hit for regexp in self.ph_regexps: if regexp.match(uri): self.logger.debug('%s changed from page to hit' % (uri)) request['is_page'] = False super_hit['viewed_pages'][day] -= 1 super_hit['viewed_hits'][day] = super_hit['viewed_hits'].get(day, 0) + 1 super_hit['viewed_pages'][0] -= 1 super_hit['viewed_hits'][0] += 1 break else: # Hit to page for regexp in self.hp_regexps: if regexp.match(uri): self.logger.debug('%s changed from hit to page' % (uri)) request['is_page'] = True super_hit['viewed_pages'][day] = super_hit['viewed_pages'].get(day, 0) + 1 super_hit['viewed_hits'][day] -= 1 super_hit['viewed_pages'][0] += 1 super_hit['viewed_hits'][0] -= 1 break