Plugins OK
This commit is contained in:
parent
34aec57c46
commit
7dada493ab
8
conf.py
8
conf.py
|
@ -11,4 +11,10 @@ analyzed_filename = 'access.log'
|
||||||
DB_ROOT = './output/'
|
DB_ROOT = './output/'
|
||||||
DISPLAY_ROOT = './output/'
|
DISPLAY_ROOT = './output/'
|
||||||
|
|
||||||
pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py']
|
pre_analysis_hooks = ['H002_soutade', 'H001_robot']
|
||||||
|
post_analysis_hooks = ['top_visitors']
|
||||||
|
display_hooks = ['top_visitors']
|
||||||
|
|
||||||
|
# pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py']
|
||||||
|
# post_analysis_hooks = ['top_visitors.py']
|
||||||
|
# display_hooks = ['top_visitors.py']
|
||||||
|
|
37
display.py
37
display.py
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
def createPage(display, filename, title):
|
def createPage(display, filename, title):
|
||||||
page = {}
|
page = {}
|
||||||
page['title'] = title;
|
page['title'] = title;
|
||||||
|
@ -14,27 +15,37 @@ def createTable(title, cols):
|
||||||
table['cols'] = cols
|
table['cols'] = cols
|
||||||
table['rows'] = []
|
table['rows'] = []
|
||||||
|
|
||||||
|
return table
|
||||||
|
|
||||||
def appendRowToTable(table, row):
|
def appendRowToTable(table, row):
|
||||||
table['rows'].append(row)
|
table['rows'].append(row)
|
||||||
|
|
||||||
def buildPages(display):
|
def buildTable(block, f):
|
||||||
|
print 'Write table %s' % block['title']
|
||||||
|
f.write('<table>')
|
||||||
|
f.write('<tr>')
|
||||||
|
for title in block['cols']:
|
||||||
|
f.write('<th>%s</th>' % (title))
|
||||||
|
f.write('</tr>')
|
||||||
|
for row in block['rows']:
|
||||||
|
f.write('<tr>')
|
||||||
|
for v in row:
|
||||||
|
f.write('<td>%s</td>' % (v))
|
||||||
|
f.write('</tr>')
|
||||||
|
f.write('</table>')
|
||||||
|
|
||||||
|
def buildPages(display_root, display):
|
||||||
for filename in display.keys():
|
for filename in display.keys():
|
||||||
page = display[filename]
|
page = display[filename]
|
||||||
with open(DISPLAY_ROOT + filename, 'w') as f:
|
print "OPEN %s" % (display_root + filename)
|
||||||
|
with open(display_root + filename, 'w') as f:
|
||||||
f.write('<html><title>%s</title><body>' % (page['title']))
|
f.write('<html><title>%s</title><body>' % (page['title']))
|
||||||
for block in page['blocks']:
|
for block in page['blocks']:
|
||||||
|
print "Bluid block"
|
||||||
|
print block
|
||||||
|
print "End block"
|
||||||
if block['type'] == 'html':
|
if block['type'] == 'html':
|
||||||
f.write(block['value'])
|
f.write(block['value'])
|
||||||
elif block['type'] == 'table':
|
elif block['type'] == 'table':
|
||||||
f.write('<table>')
|
buildTable(block, f)
|
||||||
f.write('<tr>')
|
|
||||||
for title in block['cols']:
|
|
||||||
f.write('<th>%s</th>' % (title))
|
|
||||||
f.write('</tr>')
|
|
||||||
for row in block['rows']:
|
|
||||||
f.write('<tr>')
|
|
||||||
for v in row:
|
|
||||||
f.write('<td>%s</td>' % (v))
|
|
||||||
f.write('</tr>')
|
|
||||||
f.write('</table>')
|
|
||||||
f.write('</body></html>')
|
f.write('</body></html>')
|
||||||
|
|
80
iwla.py
80
iwla.py
|
@ -17,7 +17,7 @@ DISPLAY_ROOT = './output/'
|
||||||
|
|
||||||
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
|
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
|
||||||
'"$request" $status $body_bytes_sent ' +\
|
'"$request" $status $body_bytes_sent ' +\
|
||||||
'"$http_referer" "$http_user_agent"';
|
'"$http_referer" "$http_user_agent"'
|
||||||
|
|
||||||
time_format = '%d/%b/%Y:%H:%M:%S +0100'
|
time_format = '%d/%b/%Y:%H:%M:%S +0100'
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ current_visits = {}
|
||||||
cache_plugins = {}
|
cache_plugins = {}
|
||||||
display = {}
|
display = {}
|
||||||
|
|
||||||
log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format);
|
log_format_extracted = re.sub(r'([^\$\w])', r'\\\g<1>', log_format)
|
||||||
log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', log_format_extracted)
|
log_format_extracted = re.sub(r'\$(\w+)', '(?P<\g<1>>.+)', log_format_extracted)
|
||||||
http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
|
http_request_extracted = re.compile(r'(?P<http_method>\S+) (?P<http_uri>\S+) (?P<http_version>\S+)')
|
||||||
|
|
||||||
|
@ -57,11 +57,18 @@ ANALYSIS_CLASS = 'HTTP'
|
||||||
API_VERSION = 1
|
API_VERSION = 1
|
||||||
|
|
||||||
def preloadPlugins():
|
def preloadPlugins():
|
||||||
|
ret = True
|
||||||
for root in plugins.keys():
|
for root in plugins.keys():
|
||||||
for plugin_name in plugins[root]:
|
for plugin_name in plugins[root]:
|
||||||
p = root + '/' + plugin_name
|
p = root + '/' + plugin_name
|
||||||
try:
|
try:
|
||||||
mod = cache_plugins[p] = imp.load_source('hook', p)
|
fp, pathname, description = imp.find_module(plugin_name, [root])
|
||||||
|
cache_plugins[p] = imp.load_module(plugin_name, fp, pathname, description)
|
||||||
|
#cache_plugins[p] = imp.load_module(p,None,p,("py","r",imp.PKG_DIRECTORY))
|
||||||
|
#cache_plugins[p] = imp.load_source(p, p)
|
||||||
|
mod = cache_plugins[p]
|
||||||
|
#print dir(mod)
|
||||||
|
#print "Register %s -> %s" % (p, mod)
|
||||||
infos = mod.get_plugins_infos()
|
infos = mod.get_plugins_infos()
|
||||||
if infos['class'] != ANALYSIS_CLASS or \
|
if infos['class'] != ANALYSIS_CLASS or \
|
||||||
API_VERSION < infos['min_version'] or\
|
API_VERSION < infos['min_version'] or\
|
||||||
|
@ -71,8 +78,8 @@ def preloadPlugins():
|
||||||
del cache_plugins[p]
|
del cache_plugins[p]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print 'Error loading \'%s\' => %s' % (p, e)
|
print 'Error loading \'%s\' => %s' % (p, e)
|
||||||
return False
|
ret = False
|
||||||
return True
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def createEmptyVisits():
|
def createEmptyVisits():
|
||||||
|
@ -113,12 +120,12 @@ def deserialize(filename):
|
||||||
return pickle.load(f)
|
return pickle.load(f)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def callPlugins(root, *kwargs):
|
def callPlugins(root, *args):
|
||||||
print '==> Call plugins (%s)' % root
|
print '==> Call plugins (%s)' % root
|
||||||
for p in plugins[root]:
|
for p in plugins[root]:
|
||||||
print '\t%s' % (p)
|
print '\t%s' % (p)
|
||||||
mod = cache_plugins[root + '/' + p]
|
mod = cache_plugins[root + '/' + p]
|
||||||
mod.hook(*kwargs)
|
mod.hook(*args)
|
||||||
|
|
||||||
def isPage(request):
|
def isPage(request):
|
||||||
for e in pages_extensions:
|
for e in pages_extensions:
|
||||||
|
@ -135,8 +142,8 @@ def appendHit(hit):
|
||||||
return
|
return
|
||||||
|
|
||||||
super_hit = current_visits['visits'][remote_addr]
|
super_hit = current_visits['visits'][remote_addr]
|
||||||
super_hit['pages'].append(hit)
|
super_hit['requests'].append(hit)
|
||||||
super_hit['bandwith'] += int(hit['body_bytes_sent'])
|
super_hit['bandwidth'] += int(hit['body_bytes_sent'])
|
||||||
super_hit['last_access'] = meta_visit['last_time']
|
super_hit['last_access'] = meta_visit['last_time']
|
||||||
|
|
||||||
request = hit['extract_request']
|
request = hit['extract_request']
|
||||||
|
@ -167,15 +174,16 @@ def appendHit(hit):
|
||||||
|
|
||||||
def createUser(hit):
|
def createUser(hit):
|
||||||
super_hit = current_visits['visits'][hit['remote_addr']] = {}
|
super_hit = current_visits['visits'][hit['remote_addr']] = {}
|
||||||
super_hit['viewed_pages'] = 0;
|
super_hit['remote_addr'] = hit['remote_addr']
|
||||||
super_hit['viewed_hits'] = 0;
|
super_hit['viewed_pages'] = 0
|
||||||
super_hit['not_viewed_pages'] = 0;
|
super_hit['viewed_hits'] = 0
|
||||||
super_hit['not_viewed_hits'] = 0;
|
super_hit['not_viewed_pages'] = 0
|
||||||
super_hit['bandwith'] = 0;
|
super_hit['not_viewed_hits'] = 0
|
||||||
|
super_hit['bandwidth'] = 0
|
||||||
super_hit['last_access'] = meta_visit['last_time']
|
super_hit['last_access'] = meta_visit['last_time']
|
||||||
super_hit['pages'] = [];
|
super_hit['requests'] = []
|
||||||
super_hit['robot'] = False
|
super_hit['robot'] = False
|
||||||
super_hit['hit_only'] = 0;
|
super_hit['hit_only'] = 0
|
||||||
appendHit(hit)
|
appendHit(hit)
|
||||||
|
|
||||||
def decodeHTTPRequest(hit):
|
def decodeHTTPRequest(hit):
|
||||||
|
@ -185,7 +193,7 @@ def decodeHTTPRequest(hit):
|
||||||
|
|
||||||
if groups:
|
if groups:
|
||||||
hit['extract_request'] = groups.groupdict()
|
hit['extract_request'] = groups.groupdict()
|
||||||
uri_groups = uri_re.match(hit['extract_request']['http_uri']);
|
uri_groups = uri_re.match(hit['extract_request']['http_uri'])
|
||||||
if uri_groups:
|
if uri_groups:
|
||||||
d = uri_groups.groupdict()
|
d = uri_groups.groupdict()
|
||||||
hit['extract_request']['extract_uri'] = d['extract_uri']
|
hit['extract_request']['extract_uri'] = d['extract_uri']
|
||||||
|
@ -195,7 +203,7 @@ def decodeHTTPRequest(hit):
|
||||||
print "Bad request extraction " + hit['request']
|
print "Bad request extraction " + hit['request']
|
||||||
return False
|
return False
|
||||||
|
|
||||||
referer_groups = uri_re.match(hit['http_referer']);
|
referer_groups = uri_re.match(hit['http_referer'])
|
||||||
if referer_groups:
|
if referer_groups:
|
||||||
referer = hit['extract_referer'] = referer_groups.groupdict()
|
referer = hit['extract_referer'] = referer_groups.groupdict()
|
||||||
return True
|
return True
|
||||||
|
@ -205,13 +213,19 @@ def decodeTime(hit):
|
||||||
|
|
||||||
hit['time_decoded'] = time.strptime(t, time_format)
|
hit['time_decoded'] = time.strptime(t, time_format)
|
||||||
|
|
||||||
|
def getDisplayIndex():
|
||||||
|
cur_time = meta_visit['last_time']
|
||||||
|
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
|
||||||
|
|
||||||
|
return display.get(filename, None)
|
||||||
|
|
||||||
def generateDisplayDaysStat():
|
def generateDisplayDaysStat():
|
||||||
cur_time = meta_visit['last_time']
|
cur_time = meta_visit['last_time']
|
||||||
title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year)
|
title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year)
|
||||||
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
|
filename = '%d/index_%d.html' % (cur_time.tm_year, cur_time.tm_mon)
|
||||||
page = createPage(display, filename, title)
|
page = createPage(display, filename, title)
|
||||||
|
|
||||||
days = createTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwith', 'Robot Bandwith'])
|
days = createTable('By day', ['Day', 'Visits', 'Pages', 'Hits', 'Bandwidth', 'Robot Bandwidth'])
|
||||||
|
|
||||||
keys = current_visits['days_stats'].keys()
|
keys = current_visits['days_stats'].keys()
|
||||||
keys.sort()
|
keys.sort()
|
||||||
|
@ -243,7 +257,7 @@ def generateDisplayDaysStat():
|
||||||
def generateDisplay():
|
def generateDisplay():
|
||||||
generateDisplayDaysStat()
|
generateDisplayDaysStat()
|
||||||
callPlugins(DISPLAY_HOOK_DIRECTORY, current_visits, display)
|
callPlugins(DISPLAY_HOOK_DIRECTORY, current_visits, display)
|
||||||
buildPages()
|
buildPages(DISPLAY_ROOT, display)
|
||||||
|
|
||||||
def generateStats(visits):
|
def generateStats(visits):
|
||||||
stats = {}
|
stats = {}
|
||||||
|
@ -251,27 +265,27 @@ def generateStats(visits):
|
||||||
stats['not_viewed_bandwidth'] = 0
|
stats['not_viewed_bandwidth'] = 0
|
||||||
stats['viewed_pages'] = 0
|
stats['viewed_pages'] = 0
|
||||||
stats['viewed_hits'] = 0
|
stats['viewed_hits'] = 0
|
||||||
#stats['pages'] = set()
|
#stats['requests'] = set()
|
||||||
stats['nb_visitors'] = 0
|
stats['nb_visitors'] = 0
|
||||||
|
|
||||||
for k in visits.keys():
|
for k in visits.keys():
|
||||||
super_hit = visits[k]
|
super_hit = visits[k]
|
||||||
if super_hit['robot']:
|
if super_hit['robot']:
|
||||||
stats['not_viewed_bandwidth'] += super_hit['bandwith']
|
stats['not_viewed_bandwidth'] += super_hit['bandwidth']
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
|
#print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
|
||||||
|
|
||||||
if not super_hit['hit_only']:
|
if not super_hit['hit_only']:
|
||||||
stats['nb_visitors'] += 1
|
stats['nb_visitors'] += 1
|
||||||
stats['viewed_bandwidth'] += super_hit['bandwith']
|
stats['viewed_bandwidth'] += super_hit['bandwidth']
|
||||||
stats['viewed_pages'] += super_hit['viewed_pages']
|
stats['viewed_pages'] += super_hit['viewed_pages']
|
||||||
stats['viewed_hits'] += super_hit['viewed_hits']
|
stats['viewed_hits'] += super_hit['viewed_hits']
|
||||||
|
|
||||||
# for p in super_hit['pages']:
|
# for p in super_hit['requests']:
|
||||||
# if not p['is_page']: continue
|
# if not p['is_page']: continue
|
||||||
# req = p['extract_request']
|
# req = p['extract_request']
|
||||||
# stats['pages'].add(req['extract_uri'])
|
# stats['requests'].add(req['extract_uri'])
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
@ -287,7 +301,7 @@ def generateMonthStats():
|
||||||
print stats
|
print stats
|
||||||
|
|
||||||
valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
|
valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
|
||||||
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
|
callPlugins(POST_HOOK_DIRECTORY, valid_visitors, stats)
|
||||||
|
|
||||||
current_visits['month_stats'] = stats
|
current_visits['month_stats'] = stats
|
||||||
|
|
||||||
|
@ -348,7 +362,6 @@ def newHit(hit):
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
analyse_started = True
|
analyse_started = True
|
||||||
current_visits = deserialize(getDBFilename(t)) or createEmptyVisits()
|
|
||||||
if cur_time.tm_mon != t.tm_mon:
|
if cur_time.tm_mon != t.tm_mon:
|
||||||
generateMonthStats()
|
generateMonthStats()
|
||||||
current_visits = deserialize(getDBFilename(t)) or createEmptyVisits()
|
current_visits = deserialize(getDBFilename(t)) or createEmptyVisits()
|
||||||
|
@ -371,12 +384,14 @@ preloadPlugins()
|
||||||
print '==> Analysing log'
|
print '==> Analysing log'
|
||||||
|
|
||||||
meta_visit = deserialize(META_PATH) or createEmptyMeta()
|
meta_visit = deserialize(META_PATH) or createEmptyMeta()
|
||||||
|
if meta_visit['last_time']:
|
||||||
current_visits = createEmptyVisits()
|
current_visits = deserialize(getDBFilename(meta_visit['last_time'])) or createEmptyVisits()
|
||||||
|
else:
|
||||||
|
current_visits = createEmptyVisits()
|
||||||
|
|
||||||
f = open(analyzed_filename)
|
f = open(analyzed_filename)
|
||||||
for l in f:
|
for l in f:
|
||||||
# print "line " + l;
|
# print "line " + l
|
||||||
|
|
||||||
groups = log_re.match(l)
|
groups = log_re.match(l)
|
||||||
|
|
||||||
|
@ -385,7 +400,7 @@ for l in f:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
print "No match " + l
|
print "No match " + l
|
||||||
f.close();
|
f.close()
|
||||||
|
|
||||||
if analyse_started:
|
if analyse_started:
|
||||||
generateDayStats()
|
generateDayStats()
|
||||||
|
@ -393,3 +408,4 @@ if analyse_started:
|
||||||
serialize(meta_visit, META_PATH)
|
serialize(meta_visit, META_PATH)
|
||||||
else:
|
else:
|
||||||
print '==> Analyse not started : nothing to do'
|
print '==> Analyse not started : nothing to do'
|
||||||
|
generateMonthStats()
|
||||||
|
|
|
@ -30,10 +30,12 @@ def hook(hits):
|
||||||
isRobot = False
|
isRobot = False
|
||||||
referers = 0
|
referers = 0
|
||||||
|
|
||||||
for r in awstats_robots:
|
first_page = super_hit['requests'][0]
|
||||||
if r.match(super_hit['pages'][0]['http_user_agent']):
|
if first_page['time_decoded'].tm_mday == super_hit['last_access'].tm_mday:
|
||||||
super_hit['robot'] = 1
|
for r in awstats_robots:
|
||||||
continue
|
if r.match(first_page['http_user_agent']):
|
||||||
|
super_hit['robot'] = 1
|
||||||
|
continue
|
||||||
|
|
||||||
# 1) no pages view --> robot
|
# 1) no pages view --> robot
|
||||||
if not super_hit['viewed_pages']:
|
if not super_hit['viewed_pages']:
|
||||||
|
@ -45,7 +47,7 @@ def hook(hits):
|
||||||
super_hit['robot'] = 1
|
super_hit['robot'] = 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for hit in super_hit['pages']:
|
for hit in super_hit['requests']:
|
||||||
# 3) /robots.txt read
|
# 3) /robots.txt read
|
||||||
if hit['extract_request']['http_uri'] == '/robots.txt':
|
if hit['extract_request']['http_uri'] == '/robots.txt':
|
||||||
isRobot = True
|
isRobot = True
|
||||||
|
|
|
@ -7,9 +7,11 @@ PLUGIN_CLASS = 'HTTP'
|
||||||
API_VERSION = 1
|
API_VERSION = 1
|
||||||
|
|
||||||
def get_plugins_infos():
|
def get_plugins_infos():
|
||||||
infos = {'class' : PLUGIN_CLASS,
|
infos = {
|
||||||
'min_version' : API_VERSION,
|
'class' : PLUGIN_CLASS,
|
||||||
'max_version' : -1}
|
'min_version' : API_VERSION,
|
||||||
|
'max_version' : -1
|
||||||
|
}
|
||||||
return infos
|
return infos
|
||||||
|
|
||||||
def load():
|
def load():
|
||||||
|
@ -23,9 +25,10 @@ def hook(hits):
|
||||||
|
|
||||||
if super_hit['robot']: continue
|
if super_hit['robot']: continue
|
||||||
|
|
||||||
for p in super_hit['pages']:
|
for p in super_hit['requests']:
|
||||||
if not p['is_page']: continue
|
if not p['is_page']: continue
|
||||||
if int(p['status']) != 200: continue
|
if int(p['status']) != 200: continue
|
||||||
|
if p['time_decoded'].tm_mday != super_hit['last_access'].tm_mday: continue
|
||||||
if logo_re.match(p['extract_request']['extract_uri']):
|
if logo_re.match(p['extract_request']['extract_uri']):
|
||||||
p['is_page'] = False
|
p['is_page'] = False
|
||||||
super_hit['viewed_pages'] -= 1
|
super_hit['viewed_pages'] -= 1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user