import re from iwla import IWLA from iplugin import IPlugin # Basic rule to detect robots class IWLAPreAnalysisPageToHit(IPlugin): def __init__(self, iwla): super(IWLAPreAnalysisPageToHit, self).__init__(iwla) self.API_VERSION = 1 self.conf_requires = ['viewed_http_codes'] def load(self): # Remove logo from indefero self.regexps = self.iwla.getConfValue('page_to_hit_conf', []) if not self.regexps: return False self.regexps = map(lambda(r): re.compile(r), self.regexps) return True def hook(self): hits = self.iwla.getCurrentVisists() viewed_http_codes = self.iwla.getConfValue('viewed_http_codes') for (k, super_hit) in hits.items(): if super_hit['robot']: continue for request in super_hit['requests']: if not request['is_page']: continue if not self.iwla.isValidForCurrentAnalysis(request): continue uri = request['extract_request']['extract_uri'] for regexp in self.regexps: if regexp.match(uri): #print '%s is an hit' % uri request['is_page'] = False super_hit['viewed_pages'] -= 1 super_hit['viewed_hits'] += 1 break