Need to separate day and month stats
This commit is contained in:
parent
53452fa4c3
commit
b8027fe509
|
@ -15,5 +15,6 @@ def hook(hits):
|
||||||
if not p['is_page']: continue
|
if not p['is_page']: continue
|
||||||
if logo_re.match(p['extract_request']['extract_uri']):
|
if logo_re.match(p['extract_request']['extract_uri']):
|
||||||
p['is_page'] = False
|
p['is_page'] = False
|
||||||
super_hit['viewed_pages'] -= 1
|
if super_hit['viewed_pages']:
|
||||||
super_hit['viewed_hits'] += 1
|
super_hit['viewed_pages'] -= 1
|
||||||
|
super_hit['viewed_hits'] += 1
|
99
iwla.py
99
iwla.py
|
@ -15,6 +15,7 @@ print '==> Start'
|
||||||
meta_visit = {'last_time':None}
|
meta_visit = {'last_time':None}
|
||||||
analyse_started = False
|
analyse_started = False
|
||||||
current_visits = {}
|
current_visits = {}
|
||||||
|
cache_plugins = {}
|
||||||
|
|
||||||
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
|
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
|
||||||
'"$request" $status $body_bytes_sent ' +\
|
'"$request" $status $body_bytes_sent ' +\
|
||||||
|
@ -46,6 +47,10 @@ def createEmptyVisits():
|
||||||
visits = {'days_stats' : {}, 'month_stats' : {}, 'visits' : {}}
|
visits = {'days_stats' : {}, 'month_stats' : {}, 'visits' : {}}
|
||||||
return visits
|
return visits
|
||||||
|
|
||||||
|
def createEmptyMeta():
|
||||||
|
meta = {'last_time':None}
|
||||||
|
return meta
|
||||||
|
|
||||||
def getDBFilename(time):
|
def getDBFilename(time):
|
||||||
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
|
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
|
||||||
|
|
||||||
|
@ -69,16 +74,17 @@ def deserialize(filename):
|
||||||
return pickle.load(f)
|
return pickle.load(f)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def createEmptyVisits():
|
|
||||||
pass
|
|
||||||
|
|
||||||
def callPlugins(path, *kwargs):
|
def callPlugins(path, *kwargs):
|
||||||
print '==> Call plugins (%s)' % path
|
print '==> Call plugins (%s)' % path
|
||||||
plugins = glob.glob(path)
|
plugins = glob.glob(path)
|
||||||
plugins.sort()
|
plugins.sort()
|
||||||
for p in plugins:
|
for p in plugins:
|
||||||
print '\t%s' % (p)
|
print '\t%s' % (p)
|
||||||
mod = imp.load_source('hook', p)
|
if not p in cache_plugins:
|
||||||
|
mod = imp.load_source('hook', p)
|
||||||
|
cache_plugins[p] = mod
|
||||||
|
else:
|
||||||
|
mod = cache_plugins[p]
|
||||||
mod.hook(*kwargs)
|
mod.hook(*kwargs)
|
||||||
|
|
||||||
def isPage(request):
|
def isPage(request):
|
||||||
|
@ -89,7 +95,7 @@ def isPage(request):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def appendHit(hit):
|
def appendHit(hit):
|
||||||
super_hit = current_visits[hit['remote_addr']]
|
super_hit = current_visits['visits'][hit['remote_addr']]
|
||||||
super_hit['pages'].append(hit)
|
super_hit['pages'].append(hit)
|
||||||
super_hit['bandwith'] += int(hit['body_bytes_sent'])
|
super_hit['bandwith'] += int(hit['body_bytes_sent'])
|
||||||
|
|
||||||
|
@ -102,8 +108,9 @@ def appendHit(hit):
|
||||||
|
|
||||||
hit['is_page'] = isPage(uri)
|
hit['is_page'] = isPage(uri)
|
||||||
|
|
||||||
# Don't count redirect status
|
# Don't count 3xx status
|
||||||
if int(hit['status']) == 302: return
|
status = int(hit['status'])
|
||||||
|
if status >= 300 and status < 400: return
|
||||||
|
|
||||||
if super_hit['robot'] or\
|
if super_hit['robot'] or\
|
||||||
not int(hit['status']) in viewed_http_codes:
|
not int(hit['status']) in viewed_http_codes:
|
||||||
|
@ -119,7 +126,7 @@ def appendHit(hit):
|
||||||
super_hit[hit_key] += 1
|
super_hit[hit_key] += 1
|
||||||
|
|
||||||
def createUser(hit):
|
def createUser(hit):
|
||||||
super_hit = current_visits[hit['remote_addr']] = {}
|
super_hit = current_visits['visits'][hit['remote_addr']] = {}
|
||||||
super_hit['viewed_pages'] = 0;
|
super_hit['viewed_pages'] = 0;
|
||||||
super_hit['viewed_hits'] = 0;
|
super_hit['viewed_hits'] = 0;
|
||||||
super_hit['not_viewed_pages'] = 0;
|
super_hit['not_viewed_pages'] = 0;
|
||||||
|
@ -163,40 +170,49 @@ def decodeTime(hit):
|
||||||
hit['time_decoded'] = time.strptime(t, time_format)
|
hit['time_decoded'] = time.strptime(t, time_format)
|
||||||
|
|
||||||
|
|
||||||
def generateMonthStats():
|
def generateStats(visits):
|
||||||
callPlugins(PRE_HOOK_DIRECTORY, current_visits)
|
|
||||||
|
|
||||||
valid_visitors = {k: v for (k,v) in current_visits.items() if not current_visits[k]['robot']}
|
|
||||||
|
|
||||||
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
|
|
||||||
|
|
||||||
stats = {}
|
stats = {}
|
||||||
stats['viewed_bandwidth'] = 0
|
stats['viewed_bandwidth'] = 0
|
||||||
stats['not_viewed_bandwidth'] = 0
|
stats['not_viewed_bandwidth'] = 0
|
||||||
stats['viewed_pages'] = 0
|
stats['viewed_pages'] = 0
|
||||||
stats['viewed_hits'] = 0
|
stats['viewed_hits'] = 0
|
||||||
stats['pages'] = set()
|
#stats['pages'] = set()
|
||||||
|
stats['nb_visitors'] = 0
|
||||||
|
|
||||||
for k in current_visits.keys():
|
for k in visits.keys():
|
||||||
super_hit = current_visits[k]
|
super_hit = visits[k]
|
||||||
if super_hit['robot']:
|
if super_hit['robot']:
|
||||||
stats['not_viewed_bandwidth'] += super_hit['bandwith']
|
stats['not_viewed_bandwidth'] += super_hit['bandwith']
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
|
||||||
|
|
||||||
|
stats['nb_visitors'] += 1
|
||||||
stats['viewed_bandwidth'] += super_hit['bandwith']
|
stats['viewed_bandwidth'] += super_hit['bandwith']
|
||||||
stats['viewed_pages'] += super_hit['viewed_pages']
|
stats['viewed_pages'] += super_hit['viewed_pages']
|
||||||
stats['viewed_hits'] += super_hit['viewed_hits']
|
stats['viewed_hits'] += super_hit['viewed_hits']
|
||||||
|
|
||||||
for p in super_hit['pages']:
|
# for p in super_hit['pages']:
|
||||||
if not p['is_page']: continue
|
# if not p['is_page']: continue
|
||||||
req = p['extract_request']
|
# req = p['extract_request']
|
||||||
stats['pages'].add(req['extract_uri'])
|
# stats['pages'].add(req['extract_uri'])
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
def generateMonthStats():
|
||||||
|
visits = current_visits['visits']
|
||||||
|
|
||||||
|
stats = generateStats(visits)
|
||||||
|
|
||||||
cur_time = meta_visit['last_time']
|
cur_time = meta_visit['last_time']
|
||||||
|
|
||||||
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
|
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
|
||||||
print stats
|
print stats
|
||||||
|
|
||||||
|
valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
|
||||||
|
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
|
||||||
|
|
||||||
|
current_visits['month_stats'] = stats
|
||||||
|
|
||||||
path = getDBFilename(cur_time)
|
path = getDBFilename(cur_time)
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
os.remove(path)
|
os.remove(path)
|
||||||
|
@ -205,6 +221,29 @@ def generateMonthStats():
|
||||||
|
|
||||||
serialize(current_visits, path)
|
serialize(current_visits, path)
|
||||||
|
|
||||||
|
def generateDayStats():
|
||||||
|
visits = current_visits['visits']
|
||||||
|
|
||||||
|
callPlugins(PRE_HOOK_DIRECTORY, visits)
|
||||||
|
|
||||||
|
stats = generateStats(visits)
|
||||||
|
|
||||||
|
cur_time = meta_visit['last_time']
|
||||||
|
print "== Stats for %d/%d/%d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)
|
||||||
|
|
||||||
|
if cur_time.tm_mday > 1:
|
||||||
|
last_day = cur_time.tm_mday - 1
|
||||||
|
while last_day:
|
||||||
|
if last_day in current_visits['days_stats'].keys():
|
||||||
|
break
|
||||||
|
last_day -= 1
|
||||||
|
if last_day:
|
||||||
|
for k in stats.keys():
|
||||||
|
stats[k] -= current_visits['days_stats'][last_day][k]
|
||||||
|
print stats
|
||||||
|
|
||||||
|
current_visits['days_stats'][cur_time.tm_mday] = stats
|
||||||
|
|
||||||
def newHit(hit):
|
def newHit(hit):
|
||||||
global current_visits
|
global current_visits
|
||||||
global analyse_started
|
global analyse_started
|
||||||
|
@ -217,7 +256,7 @@ def newHit(hit):
|
||||||
|
|
||||||
if cur_time == None:
|
if cur_time == None:
|
||||||
current_visits = deserialize(getDBFilename(t))
|
current_visits = deserialize(getDBFilename(t))
|
||||||
if not current_visits: current_visits = {}
|
if not current_visits: current_visits = createEmptyVisits()
|
||||||
analyse_started = True
|
analyse_started = True
|
||||||
else:
|
else:
|
||||||
if not analyse_started:
|
if not analyse_started:
|
||||||
|
@ -226,11 +265,13 @@ def newHit(hit):
|
||||||
else:
|
else:
|
||||||
analyse_started = True
|
analyse_started = True
|
||||||
current_visits = deserialize(getDBFilename(t))
|
current_visits = deserialize(getDBFilename(t))
|
||||||
if not current_visits: current_visits = {}
|
if not current_visits: current_visits = createEmptyVisits()
|
||||||
if cur_time.tm_mon != t.tm_mon:
|
if cur_time.tm_mon != t.tm_mon:
|
||||||
generateMonthStats()
|
generateMonthStats()
|
||||||
current_visits = deserialize(getDBFilename(t))
|
current_visits = deserialize(getDBFilename(t))
|
||||||
if not current_visits: current_visits = {}
|
if not current_visits: current_visits = createEmptyVisits()
|
||||||
|
elif cur_time.tm_mday != t.tm_mday:
|
||||||
|
generateDayStats()
|
||||||
|
|
||||||
meta_visit['last_time'] = t
|
meta_visit['last_time'] = t
|
||||||
|
|
||||||
|
@ -240,7 +281,7 @@ def newHit(hit):
|
||||||
if hit[k] == '-': hit[k] = ''
|
if hit[k] == '-': hit[k] = ''
|
||||||
|
|
||||||
remote_addr = hit['remote_addr']
|
remote_addr = hit['remote_addr']
|
||||||
if remote_addr in current_visits.keys():
|
if remote_addr in current_visits['visits'].keys():
|
||||||
appendHit(hit)
|
appendHit(hit)
|
||||||
else:
|
else:
|
||||||
createUser(hit)
|
createUser(hit)
|
||||||
|
@ -251,7 +292,9 @@ print '==> Analysing log'
|
||||||
|
|
||||||
meta_visit = deserialize(META_PATH)
|
meta_visit = deserialize(META_PATH)
|
||||||
if not meta_visit:
|
if not meta_visit:
|
||||||
meta_visit = {'last_time':None}
|
meta_visit = createEmptyMeta()
|
||||||
|
|
||||||
|
current_visits = createEmptyVisits()
|
||||||
|
|
||||||
f = open("access.log")
|
f = open("access.log")
|
||||||
for l in f:
|
for l in f:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user