Start big comments (post analysis / referers)

This commit is contained in:
Grégory Soutadé 2014-12-09 16:54:02 +01:00
parent 43e5e97c5a
commit 751a9b3fae
6 changed files with 168 additions and 4 deletions

View File

@ -1,6 +1,10 @@
import os import os
import codecs import codecs
#
# Create output HTML files
#
class DisplayHTMLRaw(object): class DisplayHTMLRaw(object):
def __init__(self, iwla, html=u''): def __init__(self, iwla, html=u''):
@ -310,6 +314,10 @@ class DisplayHTMLBuild(object):
for page in self.pages: for page in self.pages:
page.build(root) page.build(root)
#
# Global functions
#
def bytesToStr(bytes): def bytesToStr(bytes):
suffixes = [u'', u' kB', u' MB', u' GB', u' TB'] suffixes = [u'', u' kB', u' MB', u' GB', u' TB']

View File

@ -2,6 +2,10 @@ import importlib
import inspect import inspect
import traceback import traceback
#
# IWLA Plugin interface
#
class IPlugin(object): class IPlugin(object):
def __init__(self, iwla): def __init__(self, iwla):

83
iwla.py
View File

@ -20,6 +20,84 @@ del _
from iplugin import * from iplugin import *
from display import * from display import *
#
# Main class IWLA
# Parse Log, compute them, call plugins and produce output
# For now, only HTTP log are valid
#
# Plugin requirements : None
#
# Conf values needed :
# analyzed_filename
# domain_name
#
# Output files :
# DB_ROOT/meta.db
# DB_ROOT/year/month/iwla.db
# OUTPUT_ROOT/index.html
# OUTPUT_ROOT/year/month/index.html
#
# Statistics creation :
#
# meta =>
# last_time
# start_analysis_time
# stats =>
# year =>
# month =>
# viewed_bandwidth
# not_viewed_bandwidth
# viewed_pages
# viewed_hits
# nb_visitors
#
# month_stats :
# viewed_bandwidth
# not_viewed_bandwidth
# viewed_pages
# viewed_hits
# nb_visitors
#
# days_stats :
# day =>
# viewed_bandwidth
# not_viewed_bandwidth
# viewed_pages
# viewed_hits
# nb_visitors
#
# visits :
# remote_addr =>
# remote_addr
# remote_ip
# viewed_pages
# viewed_hits
# not_viewed_pages
# not_viewed_hits
# bandwidth
# last_access
# requests =>
# [fields_from_format_log]
# extract_request =>
# extract_uri
# extract_parameters*
# extract_referer* =>
# extract_uri
# extract_parameters*
# robot
# hit_only
# is_page
#
# valid_visitors:
# month_stats without robot and hit only visitors (if not conf.count_hit_only_visitors)
#
# Statistics update :
# None
#
# Statistics deletion :
# None
#
class IWLA(object): class IWLA(object):
ANALYSIS_CLASS = 'HTTP' ANALYSIS_CLASS = 'HTTP'
@ -105,7 +183,8 @@ class IWLA(object):
def _clearMeta(self): def _clearMeta(self):
self.meta_infos = { self.meta_infos = {
'last_time' : None 'last_time' : None,
'start_analysis_time' : None
} }
return self.meta_infos return self.meta_infos
@ -163,7 +242,6 @@ class IWLA(object):
if not remote_addr in self.current_analysis['visits'].keys(): if not remote_addr in self.current_analysis['visits'].keys():
self._createVisitor(hit) self._createVisitor(hit)
return
super_hit = self.current_analysis['visits'][remote_addr] super_hit = self.current_analysis['visits'][remote_addr]
super_hit['requests'].append(hit) super_hit['requests'].append(hit)
@ -206,7 +284,6 @@ class IWLA(object):
super_hit['requests'] = [] super_hit['requests'] = []
super_hit['robot'] = False super_hit['robot'] = False
super_hit['hit_only'] = 0 super_hit['hit_only'] = 0
self._appendHit(hit)
def _decodeHTTPRequest(self, hit): def _decodeHTTPRequest(self, hit):
if not 'request' in hit.keys(): return False if not 'request' in hit.keys(): return False

View File

@ -6,6 +6,32 @@ from iplugin import IPlugin
import awstats_data import awstats_data
#
# Post analysis hook
#
# Extract referers and key phrases from requests
#
# Plugin requirements : None
#
# Conf values needed :
# page_to_hit_conf*
# hit_to_page_conf*
#
# Output files :
# None
#
# Statistics creation :
# None
#
# Statistics update :
# visits :
# remote_addr =>
# robot
#
# Statistics deletion :
# None
#
class IWLAPostAnalysisReferers(IPlugin): class IWLAPostAnalysisReferers(IPlugin):
def __init__(self, iwla): def __init__(self, iwla):
super(IWLAPostAnalysisReferers, self).__init__(iwla) super(IWLAPostAnalysisReferers, self).__init__(iwla)

View File

@ -3,7 +3,30 @@ import re
from iwla import IWLA from iwla import IWLA
from iplugin import IPlugin from iplugin import IPlugin
# Basic rule to detect robots #
# Pre analysis hook
# Change page into hit and hit into page into statistics
#
# Plugin requirements : None
#
# Conf values needed :
# page_to_hit_conf*
# hit_to_page_conf*
#
# Output files :
# None
#
# Statistics creation :
# None
#
# Statistics update :
# visits :
# remote_addr =>
# is_page
#
# Statistics deletion :
# None
#
class IWLAPreAnalysisPageToHit(IPlugin): class IWLAPreAnalysisPageToHit(IPlugin):

View File

@ -5,6 +5,32 @@ from iplugin import IPlugin
import awstats_data import awstats_data
#
# Pre analysis hook
#
# Filter robots
#
# Plugin requirements : None
#
# Conf values needed :
# page_to_hit_conf*
# hit_to_page_conf*
#
# Output files :
# None
#
# Statistics creation :
# None
#
# Statistics update :
# visits :
# remote_addr =>
# robot
#
# Statistics deletion :
# None
#
class IWLAPreAnalysisRobots(IPlugin): class IWLAPreAnalysisRobots(IPlugin):
def __init__(self, iwla): def __init__(self, iwla):
super(IWLAPreAnalysisRobots, self).__init__(iwla) super(IWLAPreAnalysisRobots, self).__init__(iwla)