From 9fbc5448bc8262bf41346cb8b2b276dcabe7546a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9gory=20Soutad=C3=A9?= Date: Thu, 27 Nov 2014 12:34:42 +0100 Subject: [PATCH] Add conf_requires. Load plugins in order --- conf.py | 8 +++--- default_conf.py | 5 +++- iplugin.py | 40 +++++++++++++++++++--------- iwla.py | 29 ++++++++++---------- plugins/post_analysis/referers.py | 3 ++- plugins/post_analysis/reverse_dns.py | 5 +++- plugins/pre_analysis/page_to_hit.py | 5 +++- 7 files changed, 60 insertions(+), 35 deletions(-) diff --git a/conf.py b/conf.py index 94b66aa..5f957d6 100644 --- a/conf.py +++ b/conf.py @@ -16,11 +16,11 @@ DB_ROOT = './output/' DISPLAY_ROOT = './output/' pre_analysis_hooks = ['page_to_hit', 'robots'] -post_analysis_hooks = ['referers', 'top_pages'] +post_analysis_hooks = ['referers', 'top_pages', 'top_downloads'] # post_analysis_hooks = ['top_visitors', 'reverse_dns'] -display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages'] +display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads'] reverse_dns_timeout = 0.2 -page_to_hit_conf = [r'^.+/logo/$'] +page_to_hit_conf = [r'^.+/logo[/]?$', r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$'] -count_hit_only_visitors = False +count_hit_only_visitors = True diff --git a/default_conf.py b/default_conf.py index 765afa8..48ce1df 100644 --- a/default_conf.py +++ b/default_conf.py @@ -20,7 +20,10 @@ pre_analysis_hooks = [] post_analysis_hooks = [] display_hooks = [] -pages_extensions = ['/', 'html', 'xhtml', 'py', 'pl', 'rb', 'php'] +pages_extensions = ['/', 'htm', 'html', 'xhtml', 'py', 'pl', 'rb', 'php'] viewed_http_codes = [200, 304] count_hit_only_visitors = True + +multimedia_files = ['png', 'jpg', 'jpeg', 'gif', 'ico', + 'css', 'js'] diff --git a/iplugin.py b/iplugin.py index b1801af..cac9f83 100644 --- a/iplugin.py +++ b/iplugin.py @@ -7,6 +7,7 @@ class IPlugin(object): def __init__(self, iwla): self.iwla = iwla self.requires = [] + self.conf_requires = [] self.API_VERSION = 1 self.ANALYSIS_CLASS = 'HTTP' @@ -22,6 +23,9 @@ class IPlugin(object): def getRequirements(self): return self.requires + def getConfRequirements(self): + return self.conf_requires + def load(self): return True @@ -33,8 +37,8 @@ def preloadPlugins(plugins, iwla): print "==> Preload plugins" - for root in plugins.keys(): - for plugin_filename in plugins[root]: + for (root, plugins_filenames) in plugins: + for plugin_filename in plugins_filenames: plugin_path = root + '.' + plugin_filename try: mod = importlib.import_module(plugin_path) @@ -57,19 +61,29 @@ def preloadPlugins(plugins, iwla): #print 'Load plugin %s' % (plugin_name) + conf_requirements = plugin.getConfRequirements() + + requirement_validated = True + for r in conf_requirements: + conf_value = iwla.getConfValue(r, None) + if conf_value is None: + print '\'%s\' conf value required for %s' % (r, plugin_path) + requirement_validated = False + break + if not requirement_validated: continue + requirements = plugin.getRequirements() - if requirements: - requirement_validated = False - for r in requirements: - for (_,p) in cache_plugins.items(): - if p.__class__.__name__ == r: - requirement_validated = True - break - if not requirement_validated: - print 'Missing requirements for plugin %s' % (plugin_path) + requirement_validated = False + for r in requirements: + for (_,p) in cache_plugins.items(): + if p.__class__.__name__ == r: + requirement_validated = True break - if not requirement_validated: continue + if not requirement_validated: + print 'Missing requirements \'%s\' for plugin %s' % (r, plugin_path) + break + if requirements and not requirement_validated: continue if not plugin.load(): print 'Plugin %s load failed' % (plugin_path) @@ -78,7 +92,7 @@ def preloadPlugins(plugins, iwla): print '\tRegister %s' % (plugin_path) cache_plugins[plugin_path] = plugin except Exception as e: - print 'Error loading \'%s\' => %s' % (plugin_path, e) + print 'Error loading %s => %s' % (plugin_path, e) traceback.print_exc() return cache_plugins diff --git a/iwla.py b/iwla.py index 5fb4a78..ba20aaa 100755 --- a/iwla.py +++ b/iwla.py @@ -35,11 +35,11 @@ class IWLA(object): self.http_request_extracted = re.compile(r'(?P\S+) (?P\S+) (?P\S+)') self.log_re = re.compile(self.log_format_extracted) self.uri_re = re.compile(r'(?P[^\?]+)(\?(?P.+))?') - self.plugins = {conf.PRE_HOOK_DIRECTORY : conf.pre_analysis_hooks, - conf.POST_HOOK_DIRECTORY : conf.post_analysis_hooks, - conf.DISPLAY_HOOK_DIRECTORY : conf.display_hooks} + self.plugins = [(conf.PRE_HOOK_DIRECTORY , conf.pre_analysis_hooks), + (conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks), + (conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)] - def getConfValue(self, key, default): + def getConfValue(self, key, default=None): if not key in dir(conf): return default else: @@ -77,7 +77,7 @@ class IWLA(object): def isValidForCurrentAnalysis(self, request): cur_time = self.meta_infos['start_analysis_time'] - return (time.mktime(cur_time) < time.mktime(request['time_decoded'])) + return (time.mktime(cur_time) <= time.mktime(request['time_decoded'])) def _clearMeta(self): self.meta_infos = { @@ -115,12 +115,15 @@ class IWLA(object): return pickle.load(f) return None - def _callPlugins(self, root, *args): - print '==> Call plugins (%s)' % root - for p in self.plugins[root]: - print '\t%s' % (p) - mod = self.cache_plugins[root + '.' + p] - mod.hook(*args) + def _callPlugins(self, target_root, *args): + print '==> Call plugins (%s)' % target_root + for (root, plugins) in self.plugins: + if root != target_root: continue + for p in plugins: + mod = self.cache_plugins.get(root + '.' + p, None) + if mod: + print '\t%s' % (p) + mod.hook(*args) def isPage(self, request): for e in conf.pages_extensions: @@ -143,9 +146,7 @@ class IWLA(object): request = hit['extract_request'] - if 'extract_uri' in request.keys(): - uri = request['extract_uri'] = request['http_uri'] - uri = request['extract_uri'] + uri = request.get('extract_uri', request['http_uri']) hit['is_page'] = self.isPage(uri) diff --git a/plugins/post_analysis/referers.py b/plugins/post_analysis/referers.py index 6619ecd..f7dc714 100644 --- a/plugins/post_analysis/referers.py +++ b/plugins/post_analysis/referers.py @@ -10,6 +10,7 @@ class IWLAPostAnalysisReferers(IPlugin): def __init__(self, iwla): super(IWLAPostAnalysisReferers, self).__init__(iwla) self.API_VERSION = 1 + self.conf_requires = ['domain_name'] def _getSearchEngine(self, hashid): for (k, e) in self.search_engines.items(): @@ -22,7 +23,7 @@ class IWLAPostAnalysisReferers(IPlugin): domain_name = self.iwla.getConfValue('domain_name', '') if not domain_name: - print 'domain_name required in conf' + print 'domain_name must not be empty !' return False self.own_domain_re = re.compile(r'.*%s.*' % (domain_name)) diff --git a/plugins/post_analysis/reverse_dns.py b/plugins/post_analysis/reverse_dns.py index e638a45..9ffb8ab 100644 --- a/plugins/post_analysis/reverse_dns.py +++ b/plugins/post_analysis/reverse_dns.py @@ -4,12 +4,15 @@ from iwla import IWLA from iplugin import IPlugin class IWLAPostAnalysisReverseDNS(IPlugin): + DEFAULT_DNS_TIMEOUT = 0.5 + def __init__(self, iwla): super(IWLAPostAnalysisReverseDNS, self).__init__(iwla) self.API_VERSION = 1 def load(self): - timeout = self.iwla.getConfValue('reverse_dns_timeout', 0.5) + timeout = self.iwla.getConfValue('reverse_dns_timeout', + IWLAPostAnalysisReverseDNS.DEFAULT_DNS_TIMEOUT) socket.setdefaulttimeout(timeout) return True diff --git a/plugins/pre_analysis/page_to_hit.py b/plugins/pre_analysis/page_to_hit.py index 936cf63..2ecbc9c 100644 --- a/plugins/pre_analysis/page_to_hit.py +++ b/plugins/pre_analysis/page_to_hit.py @@ -10,6 +10,7 @@ class IWLAPreAnalysisPageToHit(IPlugin): def __init__(self, iwla): super(IWLAPreAnalysisPageToHit, self).__init__(iwla) self.API_VERSION = 1 + self.conf_requires = ['viewed_http_codes'] def load(self): # Remove logo from indefero @@ -21,7 +22,8 @@ class IWLAPreAnalysisPageToHit(IPlugin): def hook(self): hits = self.iwla.getCurrentVisists() - viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304]) + viewed_http_codes = self.iwla.getConfValue('viewed_http_codes') + for (k, super_hit) in hits.items(): if super_hit['robot']: continue @@ -31,6 +33,7 @@ class IWLAPreAnalysisPageToHit(IPlugin): uri = request['extract_request']['extract_uri'] for regexp in self.regexps: if regexp.match(uri): + #print '%s is an hit' % uri request['is_page'] = False super_hit['viewed_pages'] -= 1 super_hit['viewed_hits'] += 1