Start big comments (post analysis / referers)
This commit is contained in:
parent
43e5e97c5a
commit
751a9b3fae
|
@ -1,6 +1,10 @@
|
||||||
import os
|
import os
|
||||||
import codecs
|
import codecs
|
||||||
|
|
||||||
|
#
|
||||||
|
# Create output HTML files
|
||||||
|
#
|
||||||
|
|
||||||
class DisplayHTMLRaw(object):
|
class DisplayHTMLRaw(object):
|
||||||
|
|
||||||
def __init__(self, iwla, html=u''):
|
def __init__(self, iwla, html=u''):
|
||||||
|
@ -310,6 +314,10 @@ class DisplayHTMLBuild(object):
|
||||||
for page in self.pages:
|
for page in self.pages:
|
||||||
page.build(root)
|
page.build(root)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Global functions
|
||||||
|
#
|
||||||
|
|
||||||
def bytesToStr(bytes):
|
def bytesToStr(bytes):
|
||||||
suffixes = [u'', u' kB', u' MB', u' GB', u' TB']
|
suffixes = [u'', u' kB', u' MB', u' GB', u' TB']
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,10 @@ import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
#
|
||||||
|
# IWLA Plugin interface
|
||||||
|
#
|
||||||
|
|
||||||
class IPlugin(object):
|
class IPlugin(object):
|
||||||
|
|
||||||
def __init__(self, iwla):
|
def __init__(self, iwla):
|
||||||
|
|
83
iwla.py
83
iwla.py
|
@ -20,6 +20,84 @@ del _
|
||||||
from iplugin import *
|
from iplugin import *
|
||||||
from display import *
|
from display import *
|
||||||
|
|
||||||
|
#
|
||||||
|
# Main class IWLA
|
||||||
|
# Parse Log, compute them, call plugins and produce output
|
||||||
|
# For now, only HTTP log are valid
|
||||||
|
#
|
||||||
|
# Plugin requirements : None
|
||||||
|
#
|
||||||
|
# Conf values needed :
|
||||||
|
# analyzed_filename
|
||||||
|
# domain_name
|
||||||
|
#
|
||||||
|
# Output files :
|
||||||
|
# DB_ROOT/meta.db
|
||||||
|
# DB_ROOT/year/month/iwla.db
|
||||||
|
# OUTPUT_ROOT/index.html
|
||||||
|
# OUTPUT_ROOT/year/month/index.html
|
||||||
|
#
|
||||||
|
# Statistics creation :
|
||||||
|
#
|
||||||
|
# meta =>
|
||||||
|
# last_time
|
||||||
|
# start_analysis_time
|
||||||
|
# stats =>
|
||||||
|
# year =>
|
||||||
|
# month =>
|
||||||
|
# viewed_bandwidth
|
||||||
|
# not_viewed_bandwidth
|
||||||
|
# viewed_pages
|
||||||
|
# viewed_hits
|
||||||
|
# nb_visitors
|
||||||
|
#
|
||||||
|
# month_stats :
|
||||||
|
# viewed_bandwidth
|
||||||
|
# not_viewed_bandwidth
|
||||||
|
# viewed_pages
|
||||||
|
# viewed_hits
|
||||||
|
# nb_visitors
|
||||||
|
#
|
||||||
|
# days_stats :
|
||||||
|
# day =>
|
||||||
|
# viewed_bandwidth
|
||||||
|
# not_viewed_bandwidth
|
||||||
|
# viewed_pages
|
||||||
|
# viewed_hits
|
||||||
|
# nb_visitors
|
||||||
|
#
|
||||||
|
# visits :
|
||||||
|
# remote_addr =>
|
||||||
|
# remote_addr
|
||||||
|
# remote_ip
|
||||||
|
# viewed_pages
|
||||||
|
# viewed_hits
|
||||||
|
# not_viewed_pages
|
||||||
|
# not_viewed_hits
|
||||||
|
# bandwidth
|
||||||
|
# last_access
|
||||||
|
# requests =>
|
||||||
|
# [fields_from_format_log]
|
||||||
|
# extract_request =>
|
||||||
|
# extract_uri
|
||||||
|
# extract_parameters*
|
||||||
|
# extract_referer* =>
|
||||||
|
# extract_uri
|
||||||
|
# extract_parameters*
|
||||||
|
# robot
|
||||||
|
# hit_only
|
||||||
|
# is_page
|
||||||
|
#
|
||||||
|
# valid_visitors:
|
||||||
|
# month_stats without robot and hit only visitors (if not conf.count_hit_only_visitors)
|
||||||
|
#
|
||||||
|
# Statistics update :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
# Statistics deletion :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
|
||||||
class IWLA(object):
|
class IWLA(object):
|
||||||
|
|
||||||
ANALYSIS_CLASS = 'HTTP'
|
ANALYSIS_CLASS = 'HTTP'
|
||||||
|
@ -105,7 +183,8 @@ class IWLA(object):
|
||||||
|
|
||||||
def _clearMeta(self):
|
def _clearMeta(self):
|
||||||
self.meta_infos = {
|
self.meta_infos = {
|
||||||
'last_time' : None
|
'last_time' : None,
|
||||||
|
'start_analysis_time' : None
|
||||||
}
|
}
|
||||||
return self.meta_infos
|
return self.meta_infos
|
||||||
|
|
||||||
|
@ -163,7 +242,6 @@ class IWLA(object):
|
||||||
|
|
||||||
if not remote_addr in self.current_analysis['visits'].keys():
|
if not remote_addr in self.current_analysis['visits'].keys():
|
||||||
self._createVisitor(hit)
|
self._createVisitor(hit)
|
||||||
return
|
|
||||||
|
|
||||||
super_hit = self.current_analysis['visits'][remote_addr]
|
super_hit = self.current_analysis['visits'][remote_addr]
|
||||||
super_hit['requests'].append(hit)
|
super_hit['requests'].append(hit)
|
||||||
|
@ -206,7 +284,6 @@ class IWLA(object):
|
||||||
super_hit['requests'] = []
|
super_hit['requests'] = []
|
||||||
super_hit['robot'] = False
|
super_hit['robot'] = False
|
||||||
super_hit['hit_only'] = 0
|
super_hit['hit_only'] = 0
|
||||||
self._appendHit(hit)
|
|
||||||
|
|
||||||
def _decodeHTTPRequest(self, hit):
|
def _decodeHTTPRequest(self, hit):
|
||||||
if not 'request' in hit.keys(): return False
|
if not 'request' in hit.keys(): return False
|
||||||
|
|
|
@ -6,6 +6,32 @@ from iplugin import IPlugin
|
||||||
|
|
||||||
import awstats_data
|
import awstats_data
|
||||||
|
|
||||||
|
#
|
||||||
|
# Post analysis hook
|
||||||
|
#
|
||||||
|
# Extract referers and key phrases from requests
|
||||||
|
#
|
||||||
|
# Plugin requirements : None
|
||||||
|
#
|
||||||
|
# Conf values needed :
|
||||||
|
# page_to_hit_conf*
|
||||||
|
# hit_to_page_conf*
|
||||||
|
#
|
||||||
|
# Output files :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
# Statistics creation :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
# Statistics update :
|
||||||
|
# visits :
|
||||||
|
# remote_addr =>
|
||||||
|
# robot
|
||||||
|
#
|
||||||
|
# Statistics deletion :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
|
||||||
class IWLAPostAnalysisReferers(IPlugin):
|
class IWLAPostAnalysisReferers(IPlugin):
|
||||||
def __init__(self, iwla):
|
def __init__(self, iwla):
|
||||||
super(IWLAPostAnalysisReferers, self).__init__(iwla)
|
super(IWLAPostAnalysisReferers, self).__init__(iwla)
|
||||||
|
|
|
@ -3,7 +3,30 @@ import re
|
||||||
from iwla import IWLA
|
from iwla import IWLA
|
||||||
from iplugin import IPlugin
|
from iplugin import IPlugin
|
||||||
|
|
||||||
# Basic rule to detect robots
|
#
|
||||||
|
# Pre analysis hook
|
||||||
|
# Change page into hit and hit into page into statistics
|
||||||
|
#
|
||||||
|
# Plugin requirements : None
|
||||||
|
#
|
||||||
|
# Conf values needed :
|
||||||
|
# page_to_hit_conf*
|
||||||
|
# hit_to_page_conf*
|
||||||
|
#
|
||||||
|
# Output files :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
# Statistics creation :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
# Statistics update :
|
||||||
|
# visits :
|
||||||
|
# remote_addr =>
|
||||||
|
# is_page
|
||||||
|
#
|
||||||
|
# Statistics deletion :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
|
||||||
class IWLAPreAnalysisPageToHit(IPlugin):
|
class IWLAPreAnalysisPageToHit(IPlugin):
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,32 @@ from iplugin import IPlugin
|
||||||
|
|
||||||
import awstats_data
|
import awstats_data
|
||||||
|
|
||||||
|
#
|
||||||
|
# Pre analysis hook
|
||||||
|
#
|
||||||
|
# Filter robots
|
||||||
|
#
|
||||||
|
# Plugin requirements : None
|
||||||
|
#
|
||||||
|
# Conf values needed :
|
||||||
|
# page_to_hit_conf*
|
||||||
|
# hit_to_page_conf*
|
||||||
|
#
|
||||||
|
# Output files :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
# Statistics creation :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
# Statistics update :
|
||||||
|
# visits :
|
||||||
|
# remote_addr =>
|
||||||
|
# robot
|
||||||
|
#
|
||||||
|
# Statistics deletion :
|
||||||
|
# None
|
||||||
|
#
|
||||||
|
|
||||||
class IWLAPreAnalysisRobots(IPlugin):
|
class IWLAPreAnalysisRobots(IPlugin):
|
||||||
def __init__(self, iwla):
|
def __init__(self, iwla):
|
||||||
super(IWLAPreAnalysisRobots, self).__init__(iwla)
|
super(IWLAPreAnalysisRobots, self).__init__(iwla)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user