From 5ccc63c7ae33101cb4185b3217e5b0114f186d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9gory=20Soutad=C3=A9?= Date: Thu, 27 Nov 2014 13:07:14 +0100 Subject: [PATCH] Add hasBeenViewed() function --- iwla.py | 3 +++ plugins/post_analysis/top_downloads.py | 4 +++- plugins/pre_analysis/page_to_hit.py | 6 +++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/iwla.py b/iwla.py index ba20aaa..2261e80 100755 --- a/iwla.py +++ b/iwla.py @@ -79,6 +79,9 @@ class IWLA(object): cur_time = self.meta_infos['start_analysis_time'] return (time.mktime(cur_time) <= time.mktime(request['time_decoded'])) + def hasBeenViewed(self, request): + return int(request['status']) in conf.viewed_http_codes + def _clearMeta(self): self.meta_infos = { 'last_time' : None diff --git a/plugins/post_analysis/top_downloads.py b/plugins/post_analysis/top_downloads.py index 3ea7a38..65f0b3f 100644 --- a/plugins/post_analysis/top_downloads.py +++ b/plugins/post_analysis/top_downloads.py @@ -21,9 +21,11 @@ class IWLAPostAnalysisTopDownloads(IPlugin): for (k, super_hit) in stats.items(): if super_hit['robot']: continue for r in super_hit['requests']: + if not self.iwla.isValidForCurrentAnalysis(r) or\ + not self.iwla.hasBeenViewed(r): + continue if r['is_page']: continue - if not self.iwla.isValidForCurrentAnalysis(r): continue if not int(r['status']) in viewed_http_codes: continue diff --git a/plugins/pre_analysis/page_to_hit.py b/plugins/pre_analysis/page_to_hit.py index 2ecbc9c..7f18c91 100644 --- a/plugins/pre_analysis/page_to_hit.py +++ b/plugins/pre_analysis/page_to_hit.py @@ -10,7 +10,6 @@ class IWLAPreAnalysisPageToHit(IPlugin): def __init__(self, iwla): super(IWLAPreAnalysisPageToHit, self).__init__(iwla) self.API_VERSION = 1 - self.conf_requires = ['viewed_http_codes'] def load(self): # Remove logo from indefero @@ -22,14 +21,15 @@ class IWLAPreAnalysisPageToHit(IPlugin): def hook(self): hits = self.iwla.getCurrentVisists() - viewed_http_codes = self.iwla.getConfValue('viewed_http_codes') for (k, super_hit) in hits.items(): if super_hit['robot']: continue for request in super_hit['requests']: + if not self.iwla.isValidForCurrentAnalysis(request) or\ + not self.iwla.hasBeenViewed(request): + continue if not request['is_page']: continue - if not self.iwla.isValidForCurrentAnalysis(request): continue uri = request['extract_request']['extract_uri'] for regexp in self.regexps: if regexp.match(uri):