Merge branch 'logging'

This commit is contained in:
Grégory Soutadé 2014-12-16 20:24:56 +01:00
commit b4fc831f06
4 changed files with 48 additions and 39 deletions

1
TODO
View File

@ -6,5 +6,4 @@ Limit hits/pages/downloads by rate
Automatic tests
Add Licence
Free memory as soon as possible
gzip output files
different debug output levels

View File

@ -1,6 +1,7 @@
import os
import codecs
import time
import logging
#
# Create output HTML files
@ -22,14 +23,8 @@ class DisplayHTMLRaw(object):
if html: f.write(html)
def build(self, f):
# t1 = time.time()
self._buildHTML()
# t2 = time.time()
# print 'Time for _buildHTML : %d seconds' % (t2-t1)
# t1 = time.time()
self._build(f, self.html)
# t2 = time.time()
# print 'Time for _build : %d seconds' % (t2-t1)
class DisplayHTMLBlock(DisplayHTMLRaw):
@ -252,6 +247,7 @@ class DisplayHTMLPage(object):
self.filename = filename
self.blocks = []
self.css_path = listToStr(css_path)
self.logger = logging.getLogger(self.__class__.__name__)
def getFilename(self):
return self.filename;
@ -272,6 +268,8 @@ class DisplayHTMLPage(object):
if not os.path.exists(base):
os.makedirs(base)
self.logger.debug('Write %s' % (filename))
f = codecs.open(filename, 'w', 'utf-8')
f.write(u'<!DOCTYPE html>')
f.write(u'<html>')
@ -321,9 +319,7 @@ class DisplayHTMLBuild(object):
os.symlink(target, link_name)
for page in self.pages:
# print 'Build %s' % (page.filename)
page.build(root)
# print 'Built'
#
# Global functions

View File

@ -1,6 +1,6 @@
import importlib
import inspect
import traceback
import logging
#
# IWLA Plugin interface
@ -47,7 +47,9 @@ def validConfRequirements(conf_requirements, iwla, plugin_path):
def preloadPlugins(plugins, iwla):
cache_plugins = {}
print "==> Preload plugins"
logger = logging.getLogger(__name__)
logger.info("==> Preload plugins")
for (root, plugins_filenames) in plugins:
for plugin_filename in plugins_filenames:
@ -61,7 +63,7 @@ def preloadPlugins(plugins, iwla):
]
if not classes:
print 'No plugin defined in %s' % (plugin_path)
logger.warning('No plugin defined in %s' % (plugin_path))
continue
plugin = classes[0](iwla)
@ -86,18 +88,17 @@ def preloadPlugins(plugins, iwla):
requirement_validated = True
break
if not requirement_validated:
print 'Missing requirements \'%s\' for plugin %s' % (r, plugin_path)
logger.error('Missing requirements \'%s\' for plugin %s' % (r, plugin_path))
break
if requirements and not requirement_validated: continue
if not plugin.load():
print 'Plugin %s load failed' % (plugin_path)
logger.error('Plugin %s load failed' % (plugin_path))
continue
print '\tRegister %s' % (plugin_path)
logger.info('\tRegister %s' % (plugin_path))
cache_plugins[plugin_path] = plugin
except Exception as e:
print 'Error loading %s => %s' % (plugin_path, e)
traceback.print_exc()
logger.exception('Error loading %s => %s' % (plugin_path, e))
return cache_plugins

57
iwla.py
View File

@ -9,6 +9,7 @@ import pickle
import gzip
import importlib
import argparse
import logging
from calendar import monthrange
from datetime import date
@ -108,9 +109,7 @@ class IWLA(object):
API_VERSION = 1
IWLA_VERSION = '0.1'
def __init__(self):
print '==> Start'
def __init__(self, logLevel):
self.meta_infos = {}
self.analyse_started = False
self.current_analysis = {}
@ -127,6 +126,10 @@ class IWLA(object):
(conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks),
(conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)]
logging.basicConfig(format='%(name)s %(message)s', level=logLevel)
self.logger = logging.getLogger(self.__class__.__name__)
self.logger.info('==> Start')
def getVersion(self):
return IWLA.IWLA_VERSION
@ -223,13 +226,13 @@ class IWLA(object):
return None
def _callPlugins(self, target_root, *args):
print '==> Call plugins (%s)' % target_root
self.logger.info('==> Call plugins (%s)' % (target_root))
for (root, plugins) in self.plugins:
if root != target_root: continue
for p in plugins:
mod = self.cache_plugins.get(root + '.' + p, None)
if mod:
print '\t%s' % (p)
self.logger.info('\t%s' % (p))
mod.hook(*args)
def isPage(self, request):
@ -299,7 +302,7 @@ class IWLA(object):
if 'extract_parameters' in d.keys():
hit['extract_request']['extract_parameters'] = d['extract_parameters']
else:
print "Bad request extraction " + hit['request']
self.logger.warning("Bad request extraction %s" % (hit['request']))
return False
if hit['http_referer']:
@ -337,7 +340,7 @@ class IWLA(object):
cur_time = self.meta_infos['last_time']
title = 'Stats %d/%02d' % (cur_time.tm_year, cur_time.tm_mon)
filename = self.getCurDisplayPath('index.html')
print '==> Generate display (%s)' % (filename)
self.logger.info('==> Generate display (%s)' % (filename))
page = self.display.createPage(title, filename, conf.css_path)
_, nb_month_days = monthrange(cur_time.tm_year, cur_time.tm_mon)
@ -430,7 +433,8 @@ class IWLA(object):
def _generateDisplayWholeMonthStats(self):
title = 'Stats for %s' % (conf.domain_name)
filename = 'index.html'
print '==> Generate main page (%s)' % (filename)
self.logger.info('==> Generate main page (%s)' % (filename))
page = self.display.createPage(title, filename, conf.css_path)
@ -445,7 +449,9 @@ class IWLA(object):
def _compressFile(self, build_time, root, filename):
path = os.path.join(root, filename)
gz_path = path + '.gz'
#print 'Compress %s => %s' % (path, gz_path)
self.logger.debug('Compress %s => %s' % (path, gz_path))
if not os.path.exists(gz_path) or\
os.stat(path).st_mtime > build_time:
with open(path, 'rb') as f_in:
@ -492,8 +498,8 @@ class IWLA(object):
duplicated_stats = {k:v for (k,v) in stats.items()}
cur_time = self.meta_infos['last_time']
print "== Stats for %d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats
self.logger.info("== Stats for %d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon))
self.logger.info(stats)
if not 'month_stats' in self.current_analysis.keys():
self.current_analysis['month_stats'] = stats
@ -517,7 +523,7 @@ class IWLA(object):
if os.path.exists(path):
os.remove(path)
print "==> Serialize to %s" % path
self.logger.info("==> Serialize to %s" % (path))
self._serialize(self.current_analysis, path)
# Save month stats
@ -561,9 +567,8 @@ class IWLA(object):
not super_hit['robot']:
stats['nb_visits'] += 1
print "== Stats for %d/%02d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)
print stats
self.logger.info("== Stats for %d/%02d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday))
self.logger.info(stats)
self.current_analysis['days_stats'][cur_time.tm_mday] = stats
@ -601,12 +606,12 @@ class IWLA(object):
return True
def start(self, _file):
print '==> Load previous database'
self.logger.info('==> Load previous database')
self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta()
if self.meta_infos['last_time']:
print 'Last time'
print self.meta_infos['last_time']
self.logger.info('Last time')
self.logger.info(self.meta_infos['last_time'])
self.current_analysis = self._deserialize(self.getDBFilename(self.meta_infos['last_time'])) or self._clearVisits()
else:
self._clearVisits()
@ -615,7 +620,7 @@ class IWLA(object):
self.cache_plugins = preloadPlugins(self.plugins, self)
print '==> Analysing log'
self.logger.info('==> Analysing log')
for l in _file:
# print "line " + l
@ -626,7 +631,7 @@ class IWLA(object):
if not self._newHit(groups.groupdict()):
continue
else:
print "No match for " + l
self.logger.warning("No match for %s" % (l))
#break
if self.analyse_started:
@ -635,7 +640,7 @@ class IWLA(object):
del self.meta_infos['start_analysis_time']
self._serialize(self.meta_infos, conf.META_PATH)
else:
print '==> Analyse not started : nothing new'
self.logger.info('==> Analyse not started : nothing new')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Intelligent Web Log Analyzer')
@ -651,14 +656,22 @@ if __name__ == '__main__':
parser.add_argument('-f', '--file', dest='file',
help='Analyse this log file')
parser.add_argument('-d', '--log-level', dest='loglevel',
default='INFO', type=str,
help='Loglevel in %s, default : %s' % (['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 'INFO'))
args = parser.parse_args()
if args.clean_output:
if os.path.exists(conf.DB_ROOT): shutil.rmtree(conf.DB_ROOT)
if os.path.exists(conf.DISPLAY_ROOT): shutil.rmtree(conf.DISPLAY_ROOT)
iwla = IWLA()
loglevel = getattr(logging, args.loglevel.upper(), None)
if not isinstance(loglevel, int):
raise ValueError('Invalid log level: %s' % (args.loglevel))
iwla = IWLA(loglevel)
required_conf = ['analyzed_filename', 'domain_name']
if not validConfRequirements(required_conf, iwla, 'Main Conf'):
sys.exit(0)