diff --git a/conf.py b/conf.py index 76981bb..e491559 100644 --- a/conf.py +++ b/conf.py @@ -1,7 +1,7 @@ log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\ - '"$request" $status $body_bytes_sent ' +\ - '"$http_referer" "$http_user_agent"'; + '"$request" $status $body_bytes_sent ' +\ + '"$http_referer" "$http_user_agent"'; #09/Nov/2014:06:35:16 +0100 time_format = '%d/%b/%Y:%H:%M:%S +0100' @@ -15,6 +15,7 @@ pre_analysis_hooks = ['soutade', 'robots'] post_analysis_hooks = ['top_visitors', 'reverse_dns'] display_hooks = ['top_visitors'] +reverse_dns_timeout = 0.2 # pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py'] # post_analysis_hooks = ['top_visitors.py'] # display_hooks = ['top_visitors.py'] diff --git a/iplugin.py b/iplugin.py index e2701ae..ce0235d 100644 --- a/iplugin.py +++ b/iplugin.py @@ -2,10 +2,16 @@ import importlib import inspect import traceback +import default_conf as conf +import conf as _ +conf.__dict__.update(_.__dict__) +del _ + class IPlugin(object): - def __init__(self, iwla): + def __init__(self, iwla, conf): self.iwla = iwla + self.conf = conf self.requires = [] self.API_VERSION = 1 self.ANALYSIS_CLASS = 'HTTP' @@ -19,6 +25,12 @@ class IPlugin(object): return True + def getConfValue(self, key, default): + if not key in dir(self.conf): + return default + else: + return self.conf.__dict__[key] + def getRequirements(self): return self.requires @@ -46,7 +58,7 @@ def preloadPlugins(plugins, iwla): print 'No plugin defined in %s' % (plugin_path) continue - plugin = classes[0](iwla) + plugin = classes[0](iwla, conf) plugin_name = plugin.__class__.__name__ if not plugin.isValid(iwla.ANALYSIS_CLASS, iwla.API_VERSION): diff --git a/iwla.py b/iwla.py index 5c69a6e..45d8060 100755 --- a/iwla.py +++ b/iwla.py @@ -10,12 +10,14 @@ import pickle import gzip import importlib +import default_conf as conf +import conf as _ +conf.__dict__.update(_.__dict__) +del _ + from iplugin import * from display import * -from default_conf import * -from conf import * - class IWLA(object): ANALYSIS_CLASS = 'HTTP' @@ -31,36 +33,14 @@ class IWLA(object): self.display = DisplayHTMLBuild() self.valid_visitors = None - self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format) + self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', conf.log_format) self.log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', self.log_format_extracted) self.http_request_extracted = re.compile(r'(?P\S+) (?P\S+) (?P\S+)') self.log_re = re.compile(self.log_format_extracted) self.uri_re = re.compile(r'(?P[^\?]*)[\?(?P.*)]?') - self.plugins = {PRE_HOOK_DIRECTORY : pre_analysis_hooks, - POST_HOOK_DIRECTORY : post_analysis_hooks, - DISPLAY_HOOK_DIRECTORY : display_hooks} - - def _preloadPlugins(self): - self.cache_plugins = preloadPlugins(self.plugins, self) - return - ret = True - for root in self.plugins.keys(): - for plugin_name in self.plugins[root]: - p = root + '.' + plugin_name - try: - self.cache_plugins[p] = importlib.import_module(p) - mod = self.cache_plugins[p] - infos = mod.get_plugins_infos() - if infos['class'] != IWLA.ANALYSIS_CLASS or \ - IWLA.API_VERSION < infos['min_version'] or\ - (infos['max_version'] != -1 and (IWLA.API_VERSION > infos['max_version'])): - del self.cache_plugins[p] - elif not mod.load(): - del self.cache_plugins[p] - except Exception as e: - print 'Error loading \'%s\' => %s' % (p, e) - ret = False - return ret + self.plugins = {conf.PRE_HOOK_DIRECTORY : conf.pre_analysis_hooks, + conf.POST_HOOK_DIRECTORY : conf.post_analysis_hooks, + conf.DISPLAY_HOOK_DIRECTORY : conf.display_hooks} def _clearVisits(self): self.current_analysis = { @@ -97,7 +77,7 @@ class IWLA(object): return self.display def getDBFilename(self, time): - return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME) + return (conf.DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, conf.DB_FILENAME) def _serialize(self, obj, filename): base = os.path.dirname(filename) @@ -105,7 +85,7 @@ class IWLA(object): os.makedirs(base) # TODO : remove return - return + #return with open(filename + '.tmp', 'wb+') as f: pickle.dump(obj, f) @@ -130,7 +110,7 @@ class IWLA(object): mod.hook(*args) def isPage(self, request): - for e in pages_extensions: + for e in conf.pages_extensions: if request.endswith(e): return True @@ -162,7 +142,7 @@ class IWLA(object): if status >= 300 and status < 400: return if super_hit['robot'] or\ - not status in viewed_http_codes: + not status in conf.viewed_http_codes: page_key = 'not_viewed_pages' hit_key = 'not_viewed_hits' else: @@ -211,7 +191,7 @@ class IWLA(object): return True def _decodeTime(self, hit): - hit['time_decoded'] = time.strptime(hit['time_local'], time_format) + hit['time_decoded'] = time.strptime(hit['time_local'], conf.time_format) def getDisplayIndex(self): cur_time = self.meta_infos['last_time'] @@ -261,8 +241,8 @@ class IWLA(object): def _generateDisplay(self): self._generateDisplayDaysStat() - self._callPlugins(DISPLAY_HOOK_DIRECTORY, self) - self.display.build(DISPLAY_ROOT) + self._callPlugins(conf.DISPLAY_HOOK_DIRECTORY, self) + self.display.build(conf.DISPLAY_ROOT) def _generateStats(self, visits): stats = {} @@ -308,7 +288,7 @@ class IWLA(object): self.current_analysis['month_stats'] = stats self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']} - self._callPlugins(POST_HOOK_DIRECTORY, self) + self._callPlugins(conf.POST_HOOK_DIRECTORY, self) path = self.getDBFilename(cur_time) if os.path.exists(path): @@ -323,7 +303,7 @@ class IWLA(object): def _generateDayStats(self): visits = self.current_analysis['visits'] - self._callPlugins(PRE_HOOK_DIRECTORY, self) + self._callPlugins(conf.PRE_HOOK_DIRECTORY, self) stats = self._generateStats(visits) @@ -382,17 +362,17 @@ class IWLA(object): return True def start(self): - self._preloadPlugins() + self.cache_plugins = preloadPlugins(self.plugins, self) print '==> Analysing log' - self.meta_infos = self._deserialize(META_PATH) or self._clearMeta() + self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta() if self.meta_infos['last_time']: self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits() else: self._clearVisits() - with open(analyzed_filename) as f: + with open(conf.analyzed_filename) as f: for l in f: # print "line " + l @@ -408,7 +388,7 @@ class IWLA(object): if self.analyse_started: self._generateDayStats() self._generateMonthStats() - self._serialize(self.meta_infos, META_PATH) + self._serialize(self.meta_infos, conf.META_PATH) else: print '==> Analyse not started : nothing to do' self._generateMonthStats() diff --git a/plugins/display/top_visitors.py b/plugins/display/top_visitors.py index 93f455a..6e9acda 100644 --- a/plugins/display/top_visitors.py +++ b/plugins/display/top_visitors.py @@ -5,8 +5,8 @@ from iplugin import IPlugin from display import * class IWLADisplayTopVisitors(IPlugin): - def __init__(self, iwla): - super(IWLADisplayTopVisitors, self).__init__(iwla) + def __init__(self, iwla, conf): + super(IWLADisplayTopVisitors, self).__init__(iwla, conf) self.API_VERSION = 1 self.requires = ['IWLAPostAnalysisTopVisitors'] diff --git a/plugins/post_analysis/reverse_dns.py b/plugins/post_analysis/reverse_dns.py index 10b3903..14cd434 100644 --- a/plugins/post_analysis/reverse_dns.py +++ b/plugins/post_analysis/reverse_dns.py @@ -1,11 +1,18 @@ +import socket + from iwla import IWLA from iplugin import IPlugin class IWLAPostAnalysisReverseDNS(IPlugin): - def __init__(self, iwla): - super(IWLAPostAnalysisReverseDNS, self).__init__(iwla) + def __init__(self, iwla, conf): + super(IWLAPostAnalysisReverseDNS, self).__init__(iwla, conf) self.API_VERSION = 1 + def load(self): + timeout = self.getConfValue('reverse_dns_timeout', 0.5) + socket.setdefaulttimeout(timeout) + return True + def hook(self, iwla): hits = iwla.getValidVisitors() for (k, hit) in hits.items(): diff --git a/plugins/post_analysis/top_visitors.py b/plugins/post_analysis/top_visitors.py index c7de05b..525a9cd 100644 --- a/plugins/post_analysis/top_visitors.py +++ b/plugins/post_analysis/top_visitors.py @@ -2,8 +2,8 @@ from iwla import IWLA from iplugin import IPlugin class IWLAPostAnalysisTopVisitors(IPlugin): - def __init__(self, iwla): - super(IWLAPostAnalysisTopVisitors, self).__init__(iwla) + def __init__(self, iwla, conf): + super(IWLAPostAnalysisTopVisitors, self).__init__(iwla, conf) self.API_VERSION = 1 def hook(self, iwla): diff --git a/plugins/pre_analysis/robots.py b/plugins/pre_analysis/robots.py index 596552e..fdafc44 100644 --- a/plugins/pre_analysis/robots.py +++ b/plugins/pre_analysis/robots.py @@ -6,8 +6,8 @@ from iplugin import IPlugin from awstats_robots_data import awstats_robots class IWLAPreAnalysisRobots(IPlugin): - def __init__(self, iwla): - super(IWLAPreAnalysisRobots, self).__init__(iwla) + def __init__(self, iwla, conf): + super(IWLAPreAnalysisRobots, self).__init__(iwla, conf) self.API_VERSION = 1 def load(self): diff --git a/plugins/pre_analysis/soutade.py b/plugins/pre_analysis/soutade.py index 0ec4e69..5113ad6 100644 --- a/plugins/pre_analysis/soutade.py +++ b/plugins/pre_analysis/soutade.py @@ -7,8 +7,8 @@ from iplugin import IPlugin class IWLAPreAnalysisSoutade(IPlugin): - def __init__(self, iwla): - super(IWLAPreAnalysisSoutade, self).__init__(iwla) + def __init__(self, iwla, conf): + super(IWLAPreAnalysisSoutade, self).__init__(iwla, conf) self.API_VERSION = 1 def load(self):