Add option count_hit_only_visitors and function isValidForCurrentAnalysis()
This commit is contained in:
parent
6b0ed18f35
commit
dd8349ab08
5
conf.py
5
conf.py
|
@ -22,6 +22,5 @@ display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages']
|
|||
|
||||
reverse_dns_timeout = 0.2
|
||||
page_to_hit_conf = [r'^.+/logo/$']
|
||||
# pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py']
|
||||
# post_analysis_hooks = ['top_visitors.py']
|
||||
# display_hooks = ['top_visitors.py']
|
||||
|
||||
count_hit_only_visitors = False
|
||||
|
|
|
@ -22,3 +22,5 @@ display_hooks = []
|
|||
|
||||
pages_extensions = ['/', 'html', 'xhtml', 'py', 'pl', 'rb', 'php']
|
||||
viewed_http_codes = [200, 304]
|
||||
|
||||
count_hit_only_visitors = True
|
||||
|
|
32
iwla.py
32
iwla.py
|
@ -75,6 +75,10 @@ class IWLA(object):
|
|||
def getStartAnalysisTime(self):
|
||||
return self.meta_infos['start_analysis_time']
|
||||
|
||||
def isValidForCurrentAnalysis(self, request):
|
||||
cur_time = self.meta_infos['start_analysis_time']
|
||||
return (time.mktime(cur_time) < time.mktime(request['time_decoded']))
|
||||
|
||||
def _clearMeta(self):
|
||||
self.meta_infos = {
|
||||
'last_time' : None
|
||||
|
@ -264,15 +268,15 @@ class IWLA(object):
|
|||
#stats['requests'] = set()
|
||||
stats['nb_visitors'] = 0
|
||||
|
||||
for k in visits.keys():
|
||||
super_hit = visits[k]
|
||||
for (k, super_hit) in visits.items():
|
||||
if super_hit['robot']:
|
||||
stats['not_viewed_bandwidth'] += super_hit['bandwidth']
|
||||
continue
|
||||
|
||||
#print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits'])
|
||||
|
||||
if not super_hit['hit_only']:
|
||||
if conf.count_hit_only_visitors or\
|
||||
super_hit['viewed_pages']:
|
||||
stats['nb_visitors'] += 1
|
||||
stats['viewed_bandwidth'] += super_hit['bandwidth']
|
||||
stats['viewed_pages'] += super_hit['viewed_pages']
|
||||
|
@ -298,7 +302,14 @@ class IWLA(object):
|
|||
|
||||
self.current_analysis['month_stats'] = stats
|
||||
|
||||
self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']}
|
||||
self.valid_visitors = {}
|
||||
for (k,v) in visits.items():
|
||||
if v['robot']: continue
|
||||
if conf.count_hit_only_visitors and\
|
||||
(not v['viewed_pages']):
|
||||
continue
|
||||
self.valid_visitors[k] = v
|
||||
|
||||
self._callPlugins(conf.POST_HOOK_DIRECTORY)
|
||||
|
||||
path = self.getDBFilename(cur_time)
|
||||
|
@ -331,9 +342,12 @@ class IWLA(object):
|
|||
for k in stats.keys():
|
||||
stats[k] -= self.current_analysis['days_stats'][last_day][k]
|
||||
stats['nb_visitors'] = 0
|
||||
for k in visits.keys():
|
||||
if visits[k]['robot']: continue
|
||||
if visits[k]['last_access'].tm_mday == cur_time.tm_mday:
|
||||
for (k,v) in visits.items():
|
||||
if v['robot']: continue
|
||||
if conf.count_hit_only_visitors and\
|
||||
(not v['viewed_pages']):
|
||||
continue
|
||||
if v['last_access'].tm_mday == cur_time.tm_mday:
|
||||
stats['nb_visitors'] += 1
|
||||
print stats
|
||||
|
||||
|
@ -349,7 +363,7 @@ class IWLA(object):
|
|||
self.analyse_started = True
|
||||
else:
|
||||
if not self.analyse_started:
|
||||
if time.mktime(cur_time) >= time.mktime(t):
|
||||
if not self.isValidForCurrentAnalysis(hit):
|
||||
return False
|
||||
else:
|
||||
self.analyse_started = True
|
||||
|
@ -374,7 +388,7 @@ class IWLA(object):
|
|||
return True
|
||||
|
||||
def start(self):
|
||||
print '==> Analyse previous database'
|
||||
print '==> Load previous database'
|
||||
|
||||
self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta()
|
||||
if self.meta_infos['last_time']:
|
||||
|
|
|
@ -11,6 +11,8 @@ class IWLADisplayAllVisits(IPlugin):
|
|||
|
||||
def hook(self):
|
||||
hits = self.iwla.getValidVisitors()
|
||||
display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False)
|
||||
|
||||
last_access = sorted(hits.values(), key=lambda t: t['last_access'], reverse=True)
|
||||
|
||||
cur_time = self.iwla.getCurTime()
|
||||
|
@ -23,7 +25,7 @@ class IWLADisplayAllVisits(IPlugin):
|
|||
table = DisplayHTMLBlockTable('Last seen', ['Host', 'Pages', 'Hits', 'Bandwidth', 'Last seen'])
|
||||
for super_hit in last_access:
|
||||
address = super_hit['remote_addr']
|
||||
if self.iwla.getConfValue('display_visitor_ip', False) and\
|
||||
if display_visitor_ip and\
|
||||
super_hit.get('dns_name_replaced', False):
|
||||
address = '%s [%s]' % (address, super_hit['remote_ip'])
|
||||
|
||||
|
|
|
@ -91,7 +91,6 @@ class IWLADisplayReferers(IPlugin):
|
|||
index.appendBlock(table)
|
||||
|
||||
# All key phrases in a file
|
||||
cur_time = self.iwla.getCurTime()
|
||||
title = time.strftime('Key Phrases - %B %Y', cur_time)
|
||||
|
||||
filename = 'key_phrases_%d.html' % (cur_time.tm_mon)
|
||||
|
|
|
@ -11,8 +11,11 @@ class IWLADisplayTopVisitors(IPlugin):
|
|||
|
||||
def hook(self):
|
||||
hits = self.iwla.getValidVisitors()
|
||||
count_hit_only = self.iwla.getConfValue('count_hit_only_visitors', False)
|
||||
display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False)
|
||||
|
||||
top_bandwidth = [(k,hits[k]['bandwidth']) for k in hits.keys()]
|
||||
top_bandwidth = [(k,v['bandwidth']) for (k,v) in hits.items() \
|
||||
if count_hit_only or v['viewed_pages']]
|
||||
top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True)
|
||||
top_visitors = [hits[h[0]] for h in top_bandwidth[:10]]
|
||||
|
||||
|
@ -20,7 +23,7 @@ class IWLADisplayTopVisitors(IPlugin):
|
|||
table = DisplayHTMLBlockTable('Top visitors', ['Host', 'Pages', 'Hits', 'Bandwidth', 'Last seen'])
|
||||
for super_hit in top_visitors:
|
||||
address = super_hit['remote_addr']
|
||||
if self.iwla.getConfValue('display_visitor_ip', False) and\
|
||||
if display_visitor_ip and\
|
||||
super_hit.get('dns_name_replaced', False):
|
||||
address = '%s [%s]' % (address, super_hit['remote_ip'])
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import time
|
||||
import re
|
||||
import xml.sax.saxutils as saxutils
|
||||
|
||||
|
@ -66,8 +65,6 @@ class IWLAPostAnalysisReferers(IPlugin):
|
|||
break
|
||||
|
||||
def hook(self):
|
||||
start_time = self.iwla.getStartAnalysisTime()
|
||||
start_time = time.mktime(start_time)
|
||||
stats = self.iwla.getCurrentVisists()
|
||||
month_stats = self.iwla.getMonthStats()
|
||||
|
||||
|
@ -78,7 +75,7 @@ class IWLAPostAnalysisReferers(IPlugin):
|
|||
|
||||
for (k, super_hit) in stats.items():
|
||||
for r in super_hit['requests']:
|
||||
if time.mktime(r['time_decoded']) < start_time: continue
|
||||
if not self.iwla.isValidForCurrentAnalysis(r): continue
|
||||
if not r['http_referer']: continue
|
||||
|
||||
uri = r['extract_referer']['extract_uri']
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import time
|
||||
import re
|
||||
|
||||
from iwla import IWLA
|
||||
|
@ -14,9 +13,6 @@ class IWLAPostAnalysisTopPages(IPlugin):
|
|||
return True
|
||||
|
||||
def hook(self):
|
||||
start_time = self.iwla.getStartAnalysisTime()
|
||||
start_time = time.mktime(start_time)
|
||||
|
||||
stats = self.iwla.getCurrentVisists()
|
||||
month_stats = self.iwla.getMonthStats()
|
||||
|
||||
|
@ -27,7 +23,7 @@ class IWLAPostAnalysisTopPages(IPlugin):
|
|||
for r in super_hit['requests']:
|
||||
if not r['is_page']: continue
|
||||
|
||||
if time.mktime(r['time_decoded']) < start_time: continue
|
||||
if not self.iwla.isValidForCurrentAnalysis(r): continue
|
||||
|
||||
uri = r['extract_request']['extract_uri']
|
||||
if self.index_re.match(uri):
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import re
|
||||
import time
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
@ -21,21 +20,18 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
|||
return True
|
||||
|
||||
def hook(self):
|
||||
start_time = self.iwla.getStartAnalysisTime()
|
||||
start_time = time.mktime(start_time)
|
||||
|
||||
hits = self.iwla.getCurrentVisists()
|
||||
viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304])
|
||||
for (k, super_hit) in hits.items():
|
||||
if super_hit['robot']: continue
|
||||
|
||||
for p in super_hit['requests']:
|
||||
if not p['is_page']: continue
|
||||
if time.mktime(p['time_decoded']) < start_time: continue
|
||||
uri = p['extract_request']['extract_uri']
|
||||
for r in self.regexps:
|
||||
if r.match(uri):
|
||||
p['is_page'] = False
|
||||
for request in super_hit['requests']:
|
||||
if not request['is_page']: continue
|
||||
if not self.iwla.isValidForCurrentAnalysis(request): continue
|
||||
uri = request['extract_request']['extract_uri']
|
||||
for regexp in self.regexps:
|
||||
if regexp.match(uri):
|
||||
request['is_page'] = False
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
||||
break
|
||||
|
|
|
@ -18,24 +18,23 @@ class IWLAPreAnalysisRobots(IPlugin):
|
|||
# Basic rule to detect robots
|
||||
def hook(self):
|
||||
hits = self.iwla.getCurrentVisists()
|
||||
for k in hits.keys():
|
||||
super_hit = hits[k]
|
||||
|
||||
for (k, super_hit) in hits.items():
|
||||
if super_hit['robot']: continue
|
||||
|
||||
isRobot = False
|
||||
referers = 0
|
||||
|
||||
first_page = super_hit['requests'][0]
|
||||
if first_page['time_decoded'].tm_mday == super_hit['last_access'].tm_mday:
|
||||
for r in self.awstats_robots:
|
||||
if r.match(first_page['http_user_agent']):
|
||||
isRobot = True
|
||||
break
|
||||
if not self.iwla.isValidForCurrentAnalysis(first_page): continue
|
||||
|
||||
if isRobot:
|
||||
super_hit['robot'] = 1
|
||||
continue
|
||||
for r in self.awstats_robots:
|
||||
if r.match(first_page['http_user_agent']):
|
||||
isRobot = True
|
||||
break
|
||||
|
||||
if isRobot:
|
||||
super_hit['robot'] = 1
|
||||
continue
|
||||
|
||||
# 1) no pages view --> robot
|
||||
# if not super_hit['viewed_pages']:
|
||||
|
|
Loading…
Reference in New Issue
Block a user