Merge branch 'master' of soutade.fr:iwla
Conflicts: conf.py
This commit is contained in:
commit
bd31b04e9b
14
TODO
Normal file
14
TODO
Normal file
|
@ -0,0 +1,14 @@
|
|||
reverse analysis
|
||||
-f option to read a file instead of the one in conf
|
||||
Other when pages truncated
|
||||
translations
|
||||
doc auto generation
|
||||
doc enhancement
|
||||
Limit hits/pages/downloads by rate
|
||||
Automatic tests
|
||||
Test separate directory for DB and display
|
||||
quiet mode
|
||||
Add 0 before month when < 10
|
||||
Add Licence
|
||||
Free memory as soon as possible
|
||||
Bug in bandwidth account (x10)
|
90
iwla.py
90
iwla.py
|
@ -174,7 +174,7 @@ class IWLA(object):
|
|||
|
||||
def getCurDisplayPath(self, filename):
|
||||
cur_time = self.meta_infos['last_time']
|
||||
return os.path.join(str(cur_time.tm_year), str(cur_time.tm_mon), filename)
|
||||
return os.path.join(str(cur_time.tm_year), '%02d' % (cur_time.tm_mon), filename)
|
||||
|
||||
def getResourcesPath(self):
|
||||
return conf.resources_path
|
||||
|
@ -194,7 +194,7 @@ class IWLA(object):
|
|||
return self.display
|
||||
|
||||
def getDBFilename(self, time):
|
||||
return os.path.join(conf.DB_ROOT, str(time.tm_year), str(time.tm_mon), conf.DB_FILENAME)
|
||||
return os.path.join(conf.DB_ROOT, str(time.tm_year), '%02d' % (time.tm_mon), conf.DB_FILENAME)
|
||||
|
||||
def _serialize(self, obj, filename):
|
||||
base = os.path.dirname(filename)
|
||||
|
@ -255,12 +255,8 @@ class IWLA(object):
|
|||
|
||||
hit['is_page'] = self.isPage(uri)
|
||||
|
||||
status = int(hit['status'])
|
||||
if status not in conf.viewed_http_codes:
|
||||
return
|
||||
|
||||
if super_hit['robot'] or\
|
||||
not status in conf.viewed_http_codes:
|
||||
not self.hasBeenViewed(hit):
|
||||
page_key = 'not_viewed_pages'
|
||||
hit_key = 'not_viewed_hits'
|
||||
else:
|
||||
|
@ -336,7 +332,7 @@ class IWLA(object):
|
|||
|
||||
def _generateDisplayDaysStats(self):
|
||||
cur_time = self.meta_infos['last_time']
|
||||
title = 'Stats %d/%d' % (cur_time.tm_mon, cur_time.tm_year)
|
||||
title = 'Stats %d/%02d' % (cur_time.tm_year, cur_time.tm_mon)
|
||||
filename = self.getCurDisplayPath('index.html')
|
||||
print '==> Generate display (%s)' % (filename)
|
||||
page = self.display.createPage(title, filename, conf.css_path)
|
||||
|
@ -405,7 +401,7 @@ class IWLA(object):
|
|||
full_month = '%s %d' % (months_name[i], year)
|
||||
if i in month_stats.keys():
|
||||
stats = month_stats[i]
|
||||
link = '<a href="%d/%d/index.html">Details</a>' % (year, i)
|
||||
link = '<a href="%d/%02d/index.html">Details</a>' % (year, i)
|
||||
row = [full_month, stats['nb_visitors'], stats['viewed_pages'], stats['viewed_hits'],
|
||||
stats['viewed_bandwidth'], stats['not_viewed_bandwidth'], link]
|
||||
for j in graph_cols:
|
||||
|
@ -449,34 +445,14 @@ class IWLA(object):
|
|||
self._generateDisplayWholeMonthStats()
|
||||
self.display.build(conf.DISPLAY_ROOT)
|
||||
|
||||
def _generateStats(self, visits):
|
||||
def _createEmptyStats(self):
|
||||
stats = {}
|
||||
stats['viewed_bandwidth'] = 0
|
||||
stats['not_viewed_bandwidth'] = 0
|
||||
stats['viewed_pages'] = 0
|
||||
stats['viewed_hits'] = 0
|
||||
#stats['requests'] = set()
|
||||
stats['nb_visitors'] = 0
|
||||
|
||||
for (k, super_hit) in visits.items():
|
||||
if super_hit['robot']:
|
||||
stats['not_viewed_bandwidth'] += super_hit['bandwidth']
|
||||
continue
|
||||
|
||||
#print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
|
||||
|
||||
if conf.count_hit_only_visitors or\
|
||||
super_hit['viewed_pages']:
|
||||
stats['nb_visitors'] += 1
|
||||
stats['viewed_bandwidth'] += super_hit['bandwidth']
|
||||
stats['viewed_pages'] += super_hit['viewed_pages']
|
||||
stats['viewed_hits'] += super_hit['viewed_hits']
|
||||
|
||||
# for p in super_hit['requests']:
|
||||
# if not p['is_page']: continue
|
||||
# req = p['extract_request']
|
||||
# stats['requests'].add(req['extract_uri'])
|
||||
|
||||
return stats
|
||||
|
||||
def _generateMonthStats(self):
|
||||
|
@ -484,11 +460,15 @@ class IWLA(object):
|
|||
|
||||
visits = self.current_analysis['visits']
|
||||
|
||||
stats = self._generateStats(visits)
|
||||
stats = self._createEmptyStats()
|
||||
for (day, stat) in self.current_analysis['days_stats'].items():
|
||||
for k in stats.keys():
|
||||
stats[k] += stat[k]
|
||||
|
||||
duplicated_stats = {k:v for (k,v) in stats.items()}
|
||||
|
||||
cur_time = self.meta_infos['last_time']
|
||||
print "== Stats for %d/%d ==" % (cur_time.tm_year, cur_time.tm_mon)
|
||||
print "== Stats for %d/%02d ==" % (cur_time.tm_year, cur_time.tm_mon)
|
||||
print stats
|
||||
|
||||
if not 'month_stats' in self.current_analysis.keys():
|
||||
|
@ -514,7 +494,6 @@ class IWLA(object):
|
|||
os.remove(path)
|
||||
|
||||
print "==> Serialize to %s" % path
|
||||
|
||||
self._serialize(self.current_analysis, path)
|
||||
|
||||
# Save month stats
|
||||
|
@ -530,31 +509,35 @@ class IWLA(object):
|
|||
|
||||
def _generateDayStats(self):
|
||||
visits = self.current_analysis['visits']
|
||||
cur_time = self.meta_infos['last_time']
|
||||
|
||||
self._callPlugins(conf.PRE_HOOK_DIRECTORY)
|
||||
|
||||
stats = self._generateStats(visits)
|
||||
stats = self._createEmptyStats()
|
||||
|
||||
cur_time = self.meta_infos['last_time']
|
||||
print "== Stats for %d/%d/%d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)
|
||||
|
||||
if cur_time.tm_mday > 1:
|
||||
last_day = cur_time.tm_mday - 1
|
||||
while last_day:
|
||||
if last_day in self.current_analysis['days_stats'].keys():
|
||||
break
|
||||
last_day -= 1
|
||||
if last_day:
|
||||
for k in stats.keys():
|
||||
stats[k] -= self.current_analysis['days_stats'][last_day][k]
|
||||
stats['nb_visitors'] = 0
|
||||
for (k,v) in visits.items():
|
||||
if v['robot']: continue
|
||||
if conf.count_hit_only_visitors and\
|
||||
(not v['viewed_pages']):
|
||||
for (k, super_hit) in visits.items():
|
||||
if super_hit['last_access'].tm_mday != cur_time.tm_mday:
|
||||
continue
|
||||
if v['last_access'].tm_mday == cur_time.tm_mday:
|
||||
viewed_page = False
|
||||
for hit in super_hit['requests'][::-1]:
|
||||
if hit['time_decoded'].tm_mday != cur_time.tm_mday:
|
||||
break
|
||||
if super_hit['robot'] or\
|
||||
not self.hasBeenViewed(hit):
|
||||
stats['not_viewed_bandwidth'] += int(hit['body_bytes_sent'])
|
||||
continue
|
||||
stats['viewed_bandwidth'] += int(hit['body_bytes_sent'])
|
||||
if hit['is_page']:
|
||||
stats['viewed_pages'] += 1
|
||||
viewed_pages = True
|
||||
else:
|
||||
stats['viewed_hits'] += 1
|
||||
if (conf.count_hit_only_visitors or\
|
||||
viewed_pages):
|
||||
stats['nb_visitors'] += 1
|
||||
|
||||
print "== Stats for %d/%02d/%d ==" % (cur_time.tm_year, cur_time.tm_mon, cur_time.tm_mday)
|
||||
|
||||
print stats
|
||||
|
||||
self.current_analysis['days_stats'][cur_time.tm_mday] = stats
|
||||
|
@ -568,10 +551,9 @@ class IWLA(object):
|
|||
self.current_analysis = self._deserialize(self.getDBFilename(t)) or self._clearVisits()
|
||||
self.analyse_started = True
|
||||
else:
|
||||
if not self.analyse_started:
|
||||
if time.mktime(t) < time.mktime(cur_time):
|
||||
return False
|
||||
else:
|
||||
if not self.analyse_started:
|
||||
self.analyse_started = True
|
||||
if cur_time.tm_mon != t.tm_mon:
|
||||
self._generateMonthStats()
|
||||
|
|
|
@ -109,15 +109,14 @@ class IWLAPostAnalysisReferers(IPlugin):
|
|||
key_phrases = month_stats.get('key_phrases', {})
|
||||
|
||||
for (k, super_hit) in stats.items():
|
||||
for r in super_hit['requests']:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r): continue
|
||||
for r in super_hit['requests'][::-1]:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r): break
|
||||
if not r['http_referer']: continue
|
||||
|
||||
uri = r['extract_referer']['extract_uri']
|
||||
is_search_engine = False
|
||||
|
||||
if self.own_domain_re.match(uri): continue
|
||||
|
||||
is_search_engine = False
|
||||
for (name, engine) in self.search_engines.items():
|
||||
for (hashid, hashid_re) in engine['hashid']:
|
||||
if not hashid_re.match(uri): continue
|
||||
|
|
|
@ -46,14 +46,12 @@ class IWLAPostAnalysisTopDownloads(IPlugin):
|
|||
|
||||
for (k, super_hit) in stats.items():
|
||||
if super_hit['robot']: continue
|
||||
for r in super_hit['requests']:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r) or\
|
||||
not self.iwla.hasBeenViewed(r):
|
||||
for r in super_hit['requests'][::-1]:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r):
|
||||
break
|
||||
if not self.iwla.hasBeenViewed(r) or\
|
||||
r['is_page']:
|
||||
continue
|
||||
if r['is_page']: continue
|
||||
|
||||
|
||||
if not int(r['status']) in viewed_http_codes: continue
|
||||
|
||||
uri = r['extract_request']['extract_uri'].lower()
|
||||
|
||||
|
|
|
@ -40,15 +40,14 @@ class IWLAPostAnalysisTopHits(IPlugin):
|
|||
|
||||
for (k, super_hit) in stats.items():
|
||||
if super_hit['robot']: continue
|
||||
for r in super_hit['requests']:
|
||||
if r['is_page']: continue
|
||||
|
||||
if not self.iwla.isValidForCurrentAnalysis(r) or\
|
||||
not self.iwla.hasBeenViewed(r):
|
||||
for r in super_hit['requests'][::-1]:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r):
|
||||
break
|
||||
if not self.iwla.hasBeenViewed(r) or\
|
||||
r['is_page']:
|
||||
continue
|
||||
|
||||
uri = r['extract_request']['extract_uri']
|
||||
|
||||
uri = r['extract_request']['extract_uri'].lower()
|
||||
uri = "%s%s" % (r.get('server_name', ''), uri)
|
||||
|
||||
if not uri in top_hits.keys():
|
||||
|
|
|
@ -46,11 +46,11 @@ class IWLAPostAnalysisTopPages(IPlugin):
|
|||
|
||||
for (k, super_hit) in stats.items():
|
||||
if super_hit['robot']: continue
|
||||
for r in super_hit['requests']:
|
||||
if not r['is_page']: continue
|
||||
|
||||
if not self.iwla.isValidForCurrentAnalysis(r) or\
|
||||
not self.iwla.hasBeenViewed(r):
|
||||
for r in super_hit['requests'][::-1]:
|
||||
if not self.iwla.isValidForCurrentAnalysis(r):
|
||||
break
|
||||
if not self.iwla.hasBeenViewed(r) or\
|
||||
not r['is_page']:
|
||||
continue
|
||||
|
||||
uri = r['extract_request']['extract_uri']
|
||||
|
|
|
@ -54,9 +54,11 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
|||
for (k, super_hit) in hits.items():
|
||||
if super_hit['robot']: continue
|
||||
|
||||
for request in super_hit['requests']:
|
||||
if not self.iwla.isValidForCurrentAnalysis(request) or\
|
||||
not self.iwla.hasBeenViewed(request):
|
||||
for request in super_hit['requests'][::-1]:
|
||||
if not self.iwla.isValidForCurrentAnalysis(request):
|
||||
break
|
||||
|
||||
if not self.iwla.hasBeenViewed(request):
|
||||
continue
|
||||
|
||||
uri = request['extract_request']['extract_uri']
|
||||
|
|
Loading…
Reference in New Issue
Block a user