import re from iwla import IWLA from iplugin import IPlugin # Basic rule to detect robots class IWLAPreAnalysisPageToHit(IPlugin): def __init__(self, iwla): super(IWLAPreAnalysisPageToHit, self).__init__(iwla) self.API_VERSION = 1 def load(self): # Remove logo from indefero self.regexps = self.iwla.getConfValue('page_to_hit_conf', []) if not self.regexps: return False self.regexps = map(lambda(r): re.compile(r), self.regexps) return True def hook(self, iwla): hits = iwla.getCurrentVisists() for (k, super_hit) in hits.items(): if super_hit['robot']: continue for p in super_hit['requests']: if not p['is_page']: continue if int(p['status']) != 200: continue if p['time_decoded'].tm_mday != super_hit['last_access'].tm_mday: continue uri = p['extract_request']['extract_uri'] for r in self.regexps: if r.match(uri): p['is_page'] = False super_hit['viewed_pages'] -= 1 super_hit['viewed_hits'] += 1 break