import re from iwla import IWLA from iplugin import IPlugin # # Pre analysis hook # Change page into hit and hit into page into statistics # # Plugin requirements : # None # # Conf values needed : # page_to_hit_conf* # hit_to_page_conf* # # Output files : # None # # Statistics creation : # None # # Statistics update : # visits : # remote_addr => # is_page # # Statistics deletion : # None # class IWLAPreAnalysisPageToHit(IPlugin): def __init__(self, iwla): super(IWLAPreAnalysisPageToHit, self).__init__(iwla) self.API_VERSION = 1 def load(self): # Page to hit self.ph_regexps = self.iwla.getConfValue('page_to_hit_conf', []) if not self.ph_regexps: return False self.ph_regexps = map(lambda(r): re.compile(r), self.ph_regexps) # Hit to page self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', []) if not self.hp_regexps: return False self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps) return True def hook(self): hits = self.iwla.getCurrentVisists() for (k, super_hit) in hits.items(): if super_hit['robot']: continue for request in super_hit['requests']: if not self.iwla.isValidForCurrentAnalysis(request) or\ not self.iwla.hasBeenViewed(request): continue uri = request['extract_request']['extract_uri'] if request['is_page']: # Page to hit for regexp in self.ph_regexps: if regexp.match(uri): #print '%s is a hit' % (uri ) request['is_page'] = False super_hit['viewed_pages'] -= 1 super_hit['viewed_hits'] += 1 break else: # Hit to page for regexp in self.hp_regexps: if regexp.match(uri): #print '%s is a page' % (uri ) request['is_page'] = True super_hit['viewed_pages'] += 1 super_hit['viewed_hits'] -= 1 break