Need to separate day and month stats
This commit is contained in:
parent
53452fa4c3
commit
b8027fe509
|
@ -15,5 +15,6 @@ def hook(hits):
|
|||
if not p['is_page']: continue
|
||||
if logo_re.match(p['extract_request']['extract_uri']):
|
||||
p['is_page'] = False
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
||||
if super_hit['viewed_pages']:
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
99
iwla.py
99
iwla.py
|
@ -15,6 +15,7 @@ print '==> Start'
|
|||
meta_visit = {'last_time':None}
|
||||
analyse_started = False
|
||||
current_visits = {}
|
||||
cache_plugins = {}
|
||||
|
||||
log_format = '$server_name:$server_port $remote_addr - $remote_user [$time_local] ' +\
|
||||
'"$request" $status $body_bytes_sent ' +\
|
||||
|
@ -46,6 +47,10 @@ def createEmptyVisits():
|
|||
visits = {'days_stats' : {}, 'month_stats' : {}, 'visits' : {}}
|
||||
return visits
|
||||
|
||||
def createEmptyMeta():
|
||||
meta = {'last_time':None}
|
||||
return meta
|
||||
|
||||
def getDBFilename(time):
|
||||
return (DB_ROOT + '%d/%d_%s') % (time.tm_year, time.tm_mon, DB_FILENAME)
|
||||
|
||||
|
@ -69,16 +74,17 @@ def deserialize(filename):
|
|||
return pickle.load(f)
|
||||
return None
|
||||
|
||||
def createEmptyVisits():
|
||||
pass
|
||||
|
||||
def callPlugins(path, *kwargs):
|
||||
print '==> Call plugins (%s)' % path
|
||||
plugins = glob.glob(path)
|
||||
plugins.sort()
|
||||
for p in plugins:
|
||||
print '\t%s' % (p)
|
||||
mod = imp.load_source('hook', p)
|
||||
if not p in cache_plugins:
|
||||
mod = imp.load_source('hook', p)
|
||||
cache_plugins[p] = mod
|
||||
else:
|
||||
mod = cache_plugins[p]
|
||||
mod.hook(*kwargs)
|
||||
|
||||
def isPage(request):
|
||||
|
@ -89,7 +95,7 @@ def isPage(request):
|
|||
return False
|
||||
|
||||
def appendHit(hit):
|
||||
super_hit = current_visits[hit['remote_addr']]
|
||||
super_hit = current_visits['visits'][hit['remote_addr']]
|
||||
super_hit['pages'].append(hit)
|
||||
super_hit['bandwith'] += int(hit['body_bytes_sent'])
|
||||
|
||||
|
@ -102,8 +108,9 @@ def appendHit(hit):
|
|||
|
||||
hit['is_page'] = isPage(uri)
|
||||
|
||||
# Don't count redirect status
|
||||
if int(hit['status']) == 302: return
|
||||
# Don't count 3xx status
|
||||
status = int(hit['status'])
|
||||
if status >= 300 and status < 400: return
|
||||
|
||||
if super_hit['robot'] or\
|
||||
not int(hit['status']) in viewed_http_codes:
|
||||
|
@ -119,7 +126,7 @@ def appendHit(hit):
|
|||
super_hit[hit_key] += 1
|
||||
|
||||
def createUser(hit):
|
||||
super_hit = current_visits[hit['remote_addr']] = {}
|
||||
super_hit = current_visits['visits'][hit['remote_addr']] = {}
|
||||
super_hit['viewed_pages'] = 0;
|
||||
super_hit['viewed_hits'] = 0;
|
||||
super_hit['not_viewed_pages'] = 0;
|
||||
|
@ -163,40 +170,49 @@ def decodeTime(hit):
|
|||
hit['time_decoded'] = time.strptime(t, time_format)
|
||||
|
||||
|
||||
def generateMonthStats():
|
||||
callPlugins(PRE_HOOK_DIRECTORY, current_visits)
|
||||
|
||||
valid_visitors = {k: v for (k,v) in current_visits.items() if not current_visits[k]['robot']}
|
||||
|
||||
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
|
||||
|
||||
def generateStats(visits):
|
||||
stats = {}
|
||||
stats['viewed_bandwidth'] = 0
|
||||
stats['not_viewed_bandwidth'] = 0
|
||||
stats['viewed_pages'] = 0
|
||||
stats['viewed_hits'] = 0
|
||||
stats['pages'] = set()
|
||||
#stats['pages'] = set()
|
||||
stats['nb_visitors'] = 0
|
||||
|
||||
for k in current_visits.keys():
|
||||
super_hit = current_visits[k]
|
||||
for k in visits.keys():
|
||||
super_hit = visits[k]
|
||||
if super_hit['robot']:
|
||||
stats['not_viewed_bandwidth'] += super_hit['bandwith']
|
||||
continue
|
||||
|
||||
print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
|
||||
|
||||
stats['nb_visitors'] += 1
|
||||
stats['viewed_bandwidth'] += super_hit['bandwith']
|
||||
stats['viewed_pages'] += super_hit['viewed_pages']
|
||||
stats['viewed_hits'] += super_hit['viewed_hits']
|
||||
|
||||
for p in super_hit['pages']:
|
||||
if not p['is_page']: continue
|
||||
req = p['extract_request']
|
||||
stats['pages'].add(req['extract_uri'])
|
||||
# for p in super_hit['pages']:
|
||||
# if not p['is_page']: continue
|
||||
# req = p['extract_request']
|
||||
# stats['pages'].add(req['extract_uri'])
|
||||
|
||||
return stats
|
||||
|
||||
def generateMonthStats():
|
||||
visits = current_visits['visits']
|
||||
|
||||
stats = generateStats(visits)
|
||||
|
||||
cur_time = meta_visit['last_time']
|
||||
|
||||
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
|
||||
print stats
|
||||
|
||||
valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
|
||||
callPlugins(POST_HOOK_DIRECTORY, valid_visitors)
|
||||
|
||||
current_visits['month_stats'] = stats
|
||||
|
||||
path = getDBFilename(cur_time)
|
||||
if os.path.exists(path):
|
||||
os.remove(path)
|
||||
|
@ -205,6 +221,29 @@ def generateMonthStats():
|
|||
|
||||
serialize(current_visits, path)
|
||||
|
||||
def generateDayStats():
|
||||
visits = current_visits['visits']
|
||||
|
||||
callPlugins(PRE_HOOK_DIRECTORY, visits)
|
||||
|
||||
stats = generateStats(visits)
|
||||
|
||||
cur_time = meta_visit['last_time']
|
||||
print "== Stats for %d/%d/%d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)
|
||||
|
||||
if cur_time.tm_mday > 1:
|
||||
last_day = cur_time.tm_mday - 1
|
||||
while last_day:
|
||||
if last_day in current_visits['days_stats'].keys():
|
||||
break
|
||||
last_day -= 1
|
||||
if last_day:
|
||||
for k in stats.keys():
|
||||
stats[k] -= current_visits['days_stats'][last_day][k]
|
||||
print stats
|
||||
|
||||
current_visits['days_stats'][cur_time.tm_mday] = stats
|
||||
|
||||
def newHit(hit):
|
||||
global current_visits
|
||||
global analyse_started
|
||||
|
@ -217,7 +256,7 @@ def newHit(hit):
|
|||
|
||||
if cur_time == None:
|
||||
current_visits = deserialize(getDBFilename(t))
|
||||
if not current_visits: current_visits = {}
|
||||
if not current_visits: current_visits = createEmptyVisits()
|
||||
analyse_started = True
|
||||
else:
|
||||
if not analyse_started:
|
||||
|
@ -226,11 +265,13 @@ def newHit(hit):
|
|||
else:
|
||||
analyse_started = True
|
||||
current_visits = deserialize(getDBFilename(t))
|
||||
if not current_visits: current_visits = {}
|
||||
if not current_visits: current_visits = createEmptyVisits()
|
||||
if cur_time.tm_mon != t.tm_mon:
|
||||
generateMonthStats()
|
||||
current_visits = deserialize(getDBFilename(t))
|
||||
if not current_visits: current_visits = {}
|
||||
if not current_visits: current_visits = createEmptyVisits()
|
||||
elif cur_time.tm_mday != t.tm_mday:
|
||||
generateDayStats()
|
||||
|
||||
meta_visit['last_time'] = t
|
||||
|
||||
|
@ -240,7 +281,7 @@ def newHit(hit):
|
|||
if hit[k] == '-': hit[k] = ''
|
||||
|
||||
remote_addr = hit['remote_addr']
|
||||
if remote_addr in current_visits.keys():
|
||||
if remote_addr in current_visits['visits'].keys():
|
||||
appendHit(hit)
|
||||
else:
|
||||
createUser(hit)
|
||||
|
@ -251,7 +292,9 @@ print '==> Analysing log'
|
|||
|
||||
meta_visit = deserialize(META_PATH)
|
||||
if not meta_visit:
|
||||
meta_visit = {'last_time':None}
|
||||
meta_visit = createEmptyMeta()
|
||||
|
||||
current_visits = createEmptyVisits()
|
||||
|
||||
f = open("access.log")
|
||||
for l in f:
|
||||
|
|
Loading…
Reference in New Issue
Block a user