Need to separate day and month stats

This commit is contained in:
Grégory Soutadé 2014-11-19 21:37:37 +01:00
parent 53452fa4c3
commit b8027fe509
3 changed files with 74 additions and 30 deletions

View File

@ -15,5 +15,6 @@ def hook(hits):
if not p['is_page']: continue
if logo_re.match(p['extract_request']['extract_uri']):
p['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1
if super_hit['viewed_pages']:
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1

99
iwla.py
View File

@ -15,6 +15,7 @@ print '==> Start'
meta_visit = {'last_time':None}
analyse_started = False
current_visits = {}
cache_plugins = {}
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
'"$request" $status $body_bytes_sent ' +\
@ -46,6 +47,10 @@ def createEmptyVisits():
visits = {'days_stats' : {}, 'month_stats' : {}, 'visits' : {}}
return visits
def createEmptyMeta():
meta = {'last_time':None}
return meta
def getDBFilename(time):
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
@ -69,16 +74,17 @@ def deserialize(filename):
return pickle.load(f)
return None
def createEmptyVisits():
pass
def callPlugins(path, *kwargs):
print '==> Call plugins (%s)' % path
plugins = glob.glob(path)
plugins.sort()
for p in plugins:
print '\t%s' % (p)
mod = imp.load_source('hook', p)
if not p in cache_plugins:
mod = imp.load_source('hook', p)
cache_plugins[p] = mod
else:
mod = cache_plugins[p]
mod.hook(*kwargs)
def isPage(request):
@ -89,7 +95,7 @@ def isPage(request):
return False
def appendHit(hit):
super_hit = current_visits[hit['remote_addr']]
super_hit = current_visits['visits'][hit['remote_addr']]
super_hit['pages'].append(hit)
super_hit['bandwith'] += int(hit['body_bytes_sent'])
@ -102,8 +108,9 @@ def appendHit(hit):
hit['is_page'] = isPage(uri)
# Don't count redirect status
if int(hit['status']) == 302: return
# Don't count 3xx status
status = int(hit['status'])
if status >= 300 and status < 400: return
if super_hit['robot'] or\
not int(hit['status']) in viewed_http_codes:
@ -119,7 +126,7 @@ def appendHit(hit):
super_hit[hit_key] += 1
def createUser(hit):
super_hit = current_visits[hit['remote_addr']] = {}
super_hit = current_visits['visits'][hit['remote_addr']] = {}
super_hit['viewed_pages'] = 0;
super_hit['viewed_hits'] = 0;
super_hit['not_viewed_pages'] = 0;
@ -163,40 +170,49 @@ def decodeTime(hit):
hit['time_decoded'] = time.strptime(t, time_format)
def generateMonthStats():
callPlugins(PRE_HOOK_DIRECTORY, current_visits)
valid_visitors = {k: v for (k,v) in current_visits.items() if not current_visits[k]['robot']}
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
def generateStats(visits):
stats = {}
stats['viewed_bandwidth'] = 0
stats['not_viewed_bandwidth'] = 0
stats['viewed_pages'] = 0
stats['viewed_hits'] = 0
stats['pages'] = set()
#stats['pages'] = set()
stats['nb_visitors'] = 0
for k in current_visits.keys():
super_hit = current_visits[k]
for k in visits.keys():
super_hit = visits[k]
if super_hit['robot']:
stats['not_viewed_bandwidth'] += super_hit['bandwith']
continue
print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
stats['nb_visitors'] += 1
stats['viewed_bandwidth'] += super_hit['bandwith']
stats['viewed_pages'] += super_hit['viewed_pages']
stats['viewed_hits'] += super_hit['viewed_hits']
for p in super_hit['pages']:
if not p['is_page']: continue
req = p['extract_request']
stats['pages'].add(req['extract_uri'])
# for p in super_hit['pages']:
# if not p['is_page']: continue
# req = p['extract_request']
# stats['pages'].add(req['extract_uri'])
return stats
def generateMonthStats():
visits = current_visits['visits']
stats = generateStats(visits)
cur_time = meta_visit['last_time']
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
print stats
valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
current_visits['month_stats'] = stats
path = getDBFilename(cur_time)
if os.path.exists(path):
os.remove(path)
@ -205,6 +221,29 @@ def generateMonthStats():
serialize(current_visits, path)
def generateDayStats():
visits = current_visits['visits']
callPlugins(PRE_HOOK_DIRECTORY, visits)
stats = generateStats(visits)
cur_time = meta_visit['last_time']
print "== Stats for %d/%d/%d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)
if cur_time.tm_mday > 1:
last_day = cur_time.tm_mday - 1
while last_day:
if last_day in current_visits['days_stats'].keys():
break
last_day -= 1
if last_day:
for k in stats.keys():
stats[k] -= current_visits['days_stats'][last_day][k]
print stats
current_visits['days_stats'][cur_time.tm_mday] = stats
def newHit(hit):
global current_visits
global analyse_started
@ -217,7 +256,7 @@ def newHit(hit):
if cur_time == None:
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
if not current_visits: current_visits = createEmptyVisits()
analyse_started = True
else:
if not analyse_started:
@ -226,11 +265,13 @@ def newHit(hit):
else:
analyse_started = True
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
if not current_visits: current_visits = createEmptyVisits()
if cur_time.tm_mon != t.tm_mon:
generateMonthStats()
current_visits = deserialize(getDBFilename(t))
if not current_visits: current_visits = {}
if not current_visits: current_visits = createEmptyVisits()
elif cur_time.tm_mday != t.tm_mday:
generateDayStats()
meta_visit['last_time'] = t
@ -240,7 +281,7 @@ def newHit(hit):
if hit[k] == '-': hit[k] = ''
remote_addr = hit['remote_addr']
if remote_addr in current_visits.keys():
if remote_addr in current_visits['visits'].keys():
appendHit(hit)
else:
createUser(hit)
@ -251,7 +292,9 @@ print '==> Analysing log'
meta_visit = deserialize(META_PATH)
if not meta_visit:
meta_visit = {'last_time':None}
meta_visit = createEmptyMeta()
current_visits = createEmptyVisits()
f = open("access.log")
for l in f: