diff --git a/iwla.py b/iwla.py index 1a3cf0f..9201baa 100755 --- a/iwla.py +++ b/iwla.py @@ -72,6 +72,9 @@ class IWLA(object): def getCurTime(self): return self.meta_infos['last_time'] + def getStartAnalysisTime(self): + return self.meta_infos['start_analysis_time'] + def _clearMeta(self): self.meta_infos = { 'last_time' : None @@ -200,6 +203,7 @@ class IWLA(object): def _decodeTime(self, hit): hit['time_decoded'] = time.strptime(hit['time_local'], conf.time_format) + return hit['time_decoded'] def getDisplayIndex(self): cur_time = self.meta_infos['last_time'] @@ -337,9 +341,7 @@ class IWLA(object): self.current_analysis['days_stats'][cur_time.tm_mday] = stats def _newHit(self, hit): - self._decodeTime(hit) - - t = hit['time_decoded'] + t = self._decodeTime(hit) cur_time = self.meta_infos['last_time'] @@ -360,6 +362,9 @@ class IWLA(object): self.meta_infos['last_time'] = t + if not self.meta_infos['start_analysis_time']: + self.meta_infos['start_analysis_time'] = t + if not self._decodeHTTPRequest(hit): return False for k in hit.keys(): @@ -370,8 +375,6 @@ class IWLA(object): return True def start(self): - self.cache_plugins = preloadPlugins(self.plugins, self) - print '==> Analyse previous database' self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta() @@ -380,6 +383,10 @@ class IWLA(object): else: self._clearVisits() + self.meta_infos['start_analysis_time'] = None + + self.cache_plugins = preloadPlugins(self.plugins, self) + print '==> Analysing log' with open(conf.analyzed_filename) as f: @@ -398,6 +405,7 @@ class IWLA(object): if self.analyse_started: self._generateDayStats() self._generateMonthStats() + del self.meta_infos['start_analysis_time'] self._serialize(self.meta_infos, conf.META_PATH) else: print '==> Analyse not started : nothing to do' diff --git a/plugins/pre_analysis/page_to_hit.py b/plugins/pre_analysis/page_to_hit.py index 8c046b6..d704b36 100644 --- a/plugins/pre_analysis/page_to_hit.py +++ b/plugins/pre_analysis/page_to_hit.py @@ -1,4 +1,5 @@ import re +import time from iwla import IWLA from iplugin import IPlugin @@ -20,15 +21,18 @@ class IWLAPreAnalysisPageToHit(IPlugin): return True def hook(self, iwla): - hits = iwla.getCurrentVisists() + start_time = self.iwla.getStartAnalysisTime() + start_time = time.mktime(start_time) + hits = iwla.getCurrentVisists() + viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304]) for (k, super_hit) in hits.items(): if super_hit['robot']: continue for p in super_hit['requests']: if not p['is_page']: continue - if int(p['status']) != 200: continue - if p['time_decoded'].tm_mday != super_hit['last_access'].tm_mday: continue + if int(p['status']) not in viewed_http_codes: continue + if time.mktime(p['time_decoded']) < start_time: continue uri = p['extract_request']['extract_uri'] for r in self.regexps: if r.match(uri):