# -*- coding: utf-8 -*- # # Copyright Grégory Soutadé 2015 # This file is part of iwla # iwla is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # iwla is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with iwla. If not, see . # import re from iwla import IWLA from iplugin import IPlugin """ Pre analysis hook Change page into hit and hit into page into statistics Plugin requirements : None Conf values needed : page_to_hit_conf* hit_to_page_conf* Output files : None Statistics creation : None Statistics update : visits : remote_addr => is_page Statistics deletion : None """ class IWLAPreAnalysisPageToHit(IPlugin): def __init__(self, iwla): super(IWLAPreAnalysisPageToHit, self).__init__(iwla) self.API_VERSION = 1 def load(self): # Page to hit self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', []) self.ph_regexps = map(lambda(r): re.compile(r), self.ph_regexps) # Hit to page self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', []) self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps) return True def hook(self): hits = self.iwla.getCurrentVisists() for (k, super_hit) in hits.items(): if super_hit['robot']: continue for request in super_hit['requests'][::-1]: if not self.iwla.isValidForCurrentAnalysis(request): break if not self.iwla.hasBeenViewed(request): continue uri = request['extract_request']['extract_uri'] if request['is_page']: # Page to hit for regexp in self.ph_regexps: if regexp.match(uri): #print '%s is a hit' % (uri ) request['is_page'] = False super_hit['viewed_pages'] -= 1 super_hit['viewed_hits'] += 1 break else: # Hit to page for regexp in self.hp_regexps: if regexp.match(uri): #print '%s is a page' % (uri ) request['is_page'] = True super_hit['viewed_pages'] += 1 super_hit['viewed_hits'] -= 1 break