From 751a9b3fae17a5caf76933e65415573e0b346b93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9gory=20Soutad=C3=A9?= Date: Tue, 9 Dec 2014 16:54:02 +0100 Subject: [PATCH] Start big comments (post analysis / referers) --- display.py | 8 +++ iplugin.py | 4 ++ iwla.py | 83 +++++++++++++++++++++++++++-- plugins/post_analysis/referers.py | 26 +++++++++ plugins/pre_analysis/page_to_hit.py | 25 ++++++++- plugins/pre_analysis/robots.py | 26 +++++++++ 6 files changed, 168 insertions(+), 4 deletions(-) diff --git a/display.py b/display.py index 5e8dbd4..25cb0a2 100644 --- a/display.py +++ b/display.py @@ -1,6 +1,10 @@ import os import codecs +# +# Create output HTML files +# + class DisplayHTMLRaw(object): def __init__(self, iwla, html=u''): @@ -310,6 +314,10 @@ class DisplayHTMLBuild(object): for page in self.pages: page.build(root) +# +# Global functions +# + def bytesToStr(bytes): suffixes = [u'', u' kB', u' MB', u' GB', u' TB'] diff --git a/iplugin.py b/iplugin.py index 1617ebe..0ba739a 100644 --- a/iplugin.py +++ b/iplugin.py @@ -2,6 +2,10 @@ import importlib import inspect import traceback +# +# IWLA Plugin interface +# + class IPlugin(object): def __init__(self, iwla): diff --git a/iwla.py b/iwla.py index 6ddf237..1566dd5 100755 --- a/iwla.py +++ b/iwla.py @@ -20,6 +20,84 @@ del _ from iplugin import * from display import * +# +# Main class IWLA +# Parse Log, compute them, call plugins and produce output +# For now, only HTTP log are valid +# +# Plugin requirements : None +# +# Conf values needed : +# analyzed_filename +# domain_name +# +# Output files : +# DB_ROOT/meta.db +# DB_ROOT/year/month/iwla.db +# OUTPUT_ROOT/index.html +# OUTPUT_ROOT/year/month/index.html +# +# Statistics creation : +# +# meta => +# last_time +# start_analysis_time +# stats => +# year => +# month => +# viewed_bandwidth +# not_viewed_bandwidth +# viewed_pages +# viewed_hits +# nb_visitors +# +# month_stats : +# viewed_bandwidth +# not_viewed_bandwidth +# viewed_pages +# viewed_hits +# nb_visitors +# +# days_stats : +# day => +# viewed_bandwidth +# not_viewed_bandwidth +# viewed_pages +# viewed_hits +# nb_visitors +# +# visits : +# remote_addr => +# remote_addr +# remote_ip +# viewed_pages +# viewed_hits +# not_viewed_pages +# not_viewed_hits +# bandwidth +# last_access +# requests => +# [fields_from_format_log] +# extract_request => +# extract_uri +# extract_parameters* +# extract_referer* => +# extract_uri +# extract_parameters* +# robot +# hit_only +# is_page +# +# valid_visitors: +# month_stats without robot and hit only visitors (if not conf.count_hit_only_visitors) +# +# Statistics update : +# None +# +# Statistics deletion : +# None +# + class IWLA(object): ANALYSIS_CLASS = 'HTTP' @@ -105,7 +183,8 @@ class IWLA(object): def _clearMeta(self): self.meta_infos = { - 'last_time' : None + 'last_time' : None, + 'start_analysis_time' : None } return self.meta_infos @@ -163,7 +242,6 @@ class IWLA(object): if not remote_addr in self.current_analysis['visits'].keys(): self._createVisitor(hit) - return super_hit = self.current_analysis['visits'][remote_addr] super_hit['requests'].append(hit) @@ -206,7 +284,6 @@ class IWLA(object): super_hit['requests'] = [] super_hit['robot'] = False super_hit['hit_only'] = 0 - self._appendHit(hit) def _decodeHTTPRequest(self, hit): if not 'request' in hit.keys(): return False diff --git a/plugins/post_analysis/referers.py b/plugins/post_analysis/referers.py index eb6fe3e..d689aa5 100644 --- a/plugins/post_analysis/referers.py +++ b/plugins/post_analysis/referers.py @@ -6,6 +6,32 @@ from iplugin import IPlugin import awstats_data +# +# Post analysis hook +# +# Extract referers and key phrases from requests +# +# Plugin requirements : None +# +# Conf values needed : +# page_to_hit_conf* +# hit_to_page_conf* +# +# Output files : +# None +# +# Statistics creation : +# None +# +# Statistics update : +# visits : +# remote_addr => +# robot +# +# Statistics deletion : +# None +# + class IWLAPostAnalysisReferers(IPlugin): def __init__(self, iwla): super(IWLAPostAnalysisReferers, self).__init__(iwla) diff --git a/plugins/pre_analysis/page_to_hit.py b/plugins/pre_analysis/page_to_hit.py index 51102c8..ff05f0a 100644 --- a/plugins/pre_analysis/page_to_hit.py +++ b/plugins/pre_analysis/page_to_hit.py @@ -3,7 +3,30 @@ import re from iwla import IWLA from iplugin import IPlugin -# Basic rule to detect robots +# +# Pre analysis hook +# Change page into hit and hit into page into statistics +# +# Plugin requirements : None +# +# Conf values needed : +# page_to_hit_conf* +# hit_to_page_conf* +# +# Output files : +# None +# +# Statistics creation : +# None +# +# Statistics update : +# visits : +# remote_addr => +# is_page +# +# Statistics deletion : +# None +# class IWLAPreAnalysisPageToHit(IPlugin): diff --git a/plugins/pre_analysis/robots.py b/plugins/pre_analysis/robots.py index 0557448..e2db3aa 100644 --- a/plugins/pre_analysis/robots.py +++ b/plugins/pre_analysis/robots.py @@ -5,6 +5,32 @@ from iplugin import IPlugin import awstats_data +# +# Pre analysis hook +# +# Filter robots +# +# Plugin requirements : None +# +# Conf values needed : +# page_to_hit_conf* +# hit_to_page_conf* +# +# Output files : +# None +# +# Statistics creation : +# None +# +# Statistics update : +# visits : +# remote_addr => +# robot +# +# Statistics deletion : +# None +# + class IWLAPreAnalysisRobots(IPlugin): def __init__(self, iwla): super(IWLAPreAnalysisRobots, self).__init__(iwla)