From e51e07f65e983dfaff29450e86e99b8b5bc88306 Mon Sep 17 00:00:00 2001 From: Gregory Soutade Date: Fri, 21 Nov 2014 16:56:58 +0100 Subject: [PATCH] Very nice result --- conf.py | 2 +- display.py | 111 ++++++++++++++++----------- iwla.py | 59 +++++++------- plugins/pre_analysis/H001_robot.py | 4 +- plugins/pre_analysis/H002_soutade.py | 5 +- 5 files changed, 106 insertions(+), 75 deletions(-) diff --git a/conf.py b/conf.py index 5a850e4..7bf2c3b 100644 --- a/conf.py +++ b/conf.py @@ -12,7 +12,7 @@ DB_ROOT = './output/' DISPLAY_ROOT = './output/' pre_analysis_hooks = ['H002_soutade', 'H001_robot'] -post_analysis_hooks = ['top_visitors'] +post_analysis_hooks = ['top_visitors', 'reverse_dns'] display_hooks = ['top_visitors'] # pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py'] diff --git a/display.py b/display.py index 4de1bd6..6ddf1f8 100644 --- a/display.py +++ b/display.py @@ -1,51 +1,70 @@ -def createPage(display, filename, title): - page = {} - page['title'] = title; - page['blocks'] = [] - display[filename] = page +class DisplayHTMLBlock(object): - return page + def __init__(self, title): + self.title = title -def appendBlockToPage(page, block): - page['blocks'].append(block) + def build(self, f): + pass -def createTable(title, cols): - table = {'type' : 'table', 'title' : title} - table['cols'] = cols - table['rows'] = [] - - return table - -def appendRowToTable(table, row): - table['rows'].append(row) - -def buildTable(block, f): - print 'Write table %s' % block['title'] - f.write('') - f.write('') - for title in block['cols']: - f.write('' % (title)) - f.write('') - for row in block['rows']: - f.write('') - for v in row: - f.write('' % (v)) - f.write('') - f.write('
%s
%s
') +class DisplayHTMLBlockTable(DisplayHTMLBlock): -def buildPages(display_root, display): - for filename in display.keys(): - page = display[filename] - print "OPEN %s" % (display_root + filename) - with open(display_root + filename, 'w') as f: - f.write('%s' % (page['title'])) - for block in page['blocks']: - print "Bluid block" - print block - print "End block" - if block['type'] == 'html': - f.write(block['value']) - elif block['type'] == 'table': - buildTable(block, f) - f.write('') + def __init__(self, title, cols): + super(DisplayHTMLBlockTable, self).__init__(title) + self.cols = cols + self.rows = [] + + def appendRow(self, row): + self.rows.append(row) + + def build(self, f): + f.write('') + f.write('') + for title in self.cols: + f.write('' % (title)) + f.write('') + for row in self.rows: + f.write('') + for v in row: + f.write('' % (v)) + f.write('') + f.write('
%s
%s
') + +class DisplayHTMLPage(object): + + def __init__(self, title, filename): + self.title = title + self.filename = filename + self.blocks = [] + + def getFilename(self): + return self.filename; + + def appendBlock(self, block): + self.blocks.append(block) + + def build(self, root): + f = open(root + self.filename, 'w') + f.write('%s' % (self.title)) + for block in self.blocks: + block.build(f) + f.write('') + f.close() + +class DisplayHTMLBuild(object): + + def __init__(self): + self.pages = [] + + def getPage(self, filename): + for page in self.pages: + if page.getFilename() == filename: + return page + return None + + def addPage(self, page): + self.pages.append(page) + + def build(self, root): + for page in self.pages: + page.build(root) diff --git a/iwla.py b/iwla.py index daf2b15..e8a93d7 100755 --- a/iwla.py +++ b/iwla.py @@ -25,7 +25,7 @@ class IWLA(object): self.analyse_started = False self.current_analysis = {} self.cache_plugins = {} - self.display = {} + self.display = DisplayHTMLBuild() self.valid_visitors = None self.log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format) @@ -44,7 +44,7 @@ class IWLA(object): p = root + '/' + plugin_name try: fp, pathname, description = imp.find_module(plugin_name, [root]) - self.cache_plugins[p] = imp.load_module(plugin_name, fp, pathname, description) + self.cache_plugins[p] = imp.load_module(p, fp, pathname, description) mod = self.cache_plugins[p] infos = mod.get_plugins_infos() if infos['class'] != IWLA.ANALYSIS_CLASS or \ @@ -70,14 +70,17 @@ class IWLA(object): def getDaysStats(self): return self.current_analysis['days_stats'] - def getMonthStatsStats(self): + def getMonthStats(self): return self.current_analysis['month_stats'] def getCurrentVisists(self): return self.current_analysis['visits'] def getValidVisitors(self): - return self.current_analysis['visits'] + return self.valid_visitors + + def getDisplay(self): + return self.display def _clearMeta(self): self.meta_infos = { @@ -86,7 +89,7 @@ class IWLA(object): return self.meta_infos def _clearDisplay(self): - self.display = {} + self.display = DisplayHTMLBuild() return self.display def getDBFilename(self, time): @@ -100,11 +103,11 @@ class IWLA(object): # TODO : remove return return - with open(filename + '.tmp', 'wb+') as f: - pickle.dump(obj, f) - f.seek(0) - with gzip.open(filename, 'w') as fzip: - fzip.write(f.read()) + with open(filename + '.tmp', 'wb+') as f: + pickle.dump(obj, f) + f.seek(0) + with gzip.open(filename, 'w') as fzip: + fzip.write(f.read()) os.remove(filename + '.tmp') def _deserialize(self, filename): @@ -210,15 +213,16 @@ class IWLA(object): cur_time = self.meta_infos['last_time'] filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon) - return self.display.get(filename, None) + return self.display.getPage(filename) def _generateDisplayDaysStat(self): cur_time = self.meta_infos['last_time'] title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year) filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon) - page = createPage(self.display, filename, title) + print '==> Generate display (%s)' % (filename) + page = DisplayHTMLPage(title, filename) - days = createTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Robot Bandwidth']) + days = DisplayHTMLBlockTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Not viewed Bandwidth']) keys = self.current_analysis['days_stats'].keys() keys.sort() @@ -227,7 +231,7 @@ class IWLA(object): stats = self.current_analysis['days_stats'][k] row = [k, stats['nb_visitors'], stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']] row = map(lambda(v): str(v), row) - appendRowToTable(days, row) + days.appendRow(row) nb_visits += stats['nb_visitors'] stats = self.current_analysis['month_stats'] @@ -240,17 +244,18 @@ class IWLA(object): row = map(lambda(v): '0', row) row[0] = 'Average' - appendRowToTable(days, row) + days.appendRow(row) row = ['Total', nb_visits, stats['viewed_pages'], stats['viewed_hits'], stats['viewed_bandwidth'], stats['not_viewed_bandwidth']] row = map(lambda(v): str(v), row) - appendRowToTable(days, row) - appendBlockToPage(page, days) + days.appendRow(row) + page.appendBlock(days) + self.display.addPage(page) def _generateDisplay(self): self._generateDisplayDaysStat() - self._callPlugins(DISPLAY_HOOK_DIRECTORY, self.current_analysis, self.display) - buildPages(DISPLAY_ROOT, self.display) + self._callPlugins(DISPLAY_HOOK_DIRECTORY, self) + self.display.build(DISPLAY_ROOT) def _generateStats(self, visits): stats = {} @@ -293,11 +298,11 @@ class IWLA(object): print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon) print stats - self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']} - self._callPlugins(POST_HOOK_DIRECTORY, valid_visitors, stats) - self.current_analysis['month_stats'] = stats + self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']} + self._callPlugins(POST_HOOK_DIRECTORY, self) + path = self.getDBFilename(cur_time) if os.path.exists(path): os.remove(path) @@ -311,7 +316,7 @@ class IWLA(object): def _generateDayStats(self): visits = self.current_analysis['visits'] - self._callPlugins(PRE_HOOK_DIRECTORY, visits) + self._callPlugins(PRE_HOOK_DIRECTORY, self) stats = self._generateStats(visits) @@ -391,14 +396,16 @@ class IWLA(object): break else: print "No match for " + l + #break if self.analyse_started: self._generateDayStats() self._generateMonthStats() - self._serialize(meta_infos, META_PATH) + self._serialize(self.meta_infos, META_PATH) else: print '==> Analyse not started : nothing to do' self._generateMonthStats() -iwla = IWLA() -iwla.start() +if __name__ == '__main__': + iwla = IWLA() + iwla.start() diff --git a/plugins/pre_analysis/H001_robot.py b/plugins/pre_analysis/H001_robot.py index a096dc8..a299fa5 100644 --- a/plugins/pre_analysis/H001_robot.py +++ b/plugins/pre_analysis/H001_robot.py @@ -1,4 +1,5 @@ import re +from iwla import IWLA from awstats_robots_data import awstats_robots @@ -21,7 +22,8 @@ def load(): # Basic rule to detect robots -def hook(hits): +def hook(iwla): + hits = iwla.getCurrentVisists() for k in hits.keys(): super_hit = hits[k] diff --git a/plugins/pre_analysis/H002_soutade.py b/plugins/pre_analysis/H002_soutade.py index 5b70f64..b893715 100644 --- a/plugins/pre_analysis/H002_soutade.py +++ b/plugins/pre_analysis/H002_soutade.py @@ -1,4 +1,5 @@ import re +from iwla import IWLA # Remove logo from indefero logo_re = re.compile(r'^.+/logo/$') @@ -19,7 +20,9 @@ def load(): # Basic rule to detect robots -def hook(hits): +def hook(iwla): + hits = iwla.getCurrentVisists() + for k in hits.keys(): super_hit = hits[k]