Split referers plugin in post_analysis and display
Remove post_analysis top_visitors (done in display)
This commit is contained in:
parent
fec5e375e4
commit
f8a48a7144
2
conf.py
2
conf.py
|
@ -16,7 +16,7 @@ DB_ROOT = './output/'
|
|||
DISPLAY_ROOT = './output/'
|
||||
|
||||
pre_analysis_hooks = ['page_to_hit', 'robots']
|
||||
post_analysis_hooks = ['top_visitors']
|
||||
post_analysis_hooks = ['referers']
|
||||
# post_analysis_hooks = ['top_visitors', 'reverse_dns']
|
||||
display_hooks = ['top_visitors', 'all_visits', 'referers']
|
||||
|
||||
|
|
|
@ -1,120 +1,21 @@
|
|||
import time
|
||||
import re
|
||||
import xml.sax.saxutils as saxutils
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
from display import *
|
||||
|
||||
import awstats_data
|
||||
|
||||
class IWLADisplayReferers(IPlugin):
|
||||
def __init__(self, iwla):
|
||||
super(IWLADisplayReferers, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def _getSearchEngine(self, hashid):
|
||||
#print 'Look for %s' % engine
|
||||
for (k, e) in self.search_engines.items():
|
||||
for (h,h_re) in e['hashid']:
|
||||
if hashid == h:
|
||||
return k
|
||||
#print 'Not found %s' % (hashid)
|
||||
return None
|
||||
|
||||
def load(self):
|
||||
domain_name = self.iwla.getConfValue('domain_name', '')
|
||||
|
||||
if not domain_name:
|
||||
print 'domain_name required in conf'
|
||||
return False
|
||||
|
||||
self.own_domain_re = re.compile(r'.*%s.*' % (domain_name))
|
||||
self.search_engines = {}
|
||||
|
||||
for (hashid, name) in awstats_data.search_engines_hashid.items():
|
||||
hashid_re = re.compile(r'.*%s.*' % (hashid))
|
||||
if not name in self.search_engines.keys():
|
||||
self.search_engines[name] = {
|
||||
'hashid' : [(hashid, hashid_re)]
|
||||
}
|
||||
else:
|
||||
self.search_engines[name]['hashid'].append((hashid, hashid_re))
|
||||
#print 'Hashid %s => %s' % (name, hashid)
|
||||
|
||||
for (name, known_url) in awstats_data.search_engines_knwown_url.items():
|
||||
self.search_engines[name]['known_url'] = re.compile(known_url + '(?P<key_phrase>.+)')
|
||||
|
||||
for (engine, not_engine) in awstats_data.not_search_engines_keys.items():
|
||||
not_engine_re = re.compile(r'.*%s.*' % (not_engine))
|
||||
key = self._getSearchEngine(engine)
|
||||
if key:
|
||||
self.search_engines[key]['not_search_engine'] = not_engine_re
|
||||
|
||||
#self.html_parser = html.parser.HTMLParser()
|
||||
|
||||
return True
|
||||
|
||||
def _extractKeyPhrase(self, key_phrase_re, parameters, key_phrases):
|
||||
if not parameters or not key_phrase_re: return
|
||||
|
||||
for p in parameters.split('&'):
|
||||
groups = key_phrase_re.match(p)
|
||||
if groups:
|
||||
key_phrase = groups.groupdict()['key_phrase']
|
||||
key_phrase = key_phrase.replace('+', ' ').lower()
|
||||
key_phrase = saxutils.unescape(key_phrase)
|
||||
if not key_phrase in key_phrases.keys():
|
||||
key_phrases[key_phrase] = 1
|
||||
else:
|
||||
key_phrases[key_phrase] += 1
|
||||
break
|
||||
self.requires = ['IWLAPostAnalysisReferers']
|
||||
|
||||
def hook(self):
|
||||
stats = self.iwla.getCurrentVisists()
|
||||
referers = {}
|
||||
robots_referers = {}
|
||||
search_engine_referers = {}
|
||||
key_phrases = {}
|
||||
|
||||
for (k, super_hit) in stats.items():
|
||||
for r in super_hit['requests']:
|
||||
if not r['http_referer']: continue
|
||||
|
||||
uri = r['extract_referer']['extract_uri']
|
||||
is_search_engine = False
|
||||
|
||||
if self.own_domain_re.match(uri): continue
|
||||
|
||||
for (name, engine) in self.search_engines.items():
|
||||
for (hashid, hashid_re) in engine['hashid']:
|
||||
if not hashid_re.match(uri): continue
|
||||
|
||||
not_engine = engine.get('not_search_engine', None)
|
||||
# Try not engine
|
||||
if not_engine and not_engine.match(uri): break
|
||||
is_search_engine = True
|
||||
uri = name
|
||||
|
||||
parameters = r['extract_referer'].get('extract_parameters', None)
|
||||
key_phrase_re = engine.get('known_url', None)
|
||||
|
||||
self._extractKeyPhrase(key_phrase_re, parameters, key_phrases)
|
||||
break
|
||||
|
||||
if is_search_engine:
|
||||
dictionary = search_engine_referers
|
||||
elif super_hit['robot']:
|
||||
dictionary = robots_referers
|
||||
# print '%s => %s' % (uri, super_hit['remote_ip'])
|
||||
else:
|
||||
dictionary = referers
|
||||
if r['is_page']:
|
||||
key = 'pages'
|
||||
else:
|
||||
key = 'hits'
|
||||
if not uri in dictionary: dictionary[uri] = {'pages':0, 'hits':0}
|
||||
dictionary[uri][key] += 1
|
||||
month_stats = self.iwla.getMonthStats()
|
||||
referers = month_stats.get('referers', {})
|
||||
robots_referers = month_stats.get('robots_referers', {})
|
||||
search_engine_referers = month_stats.get('search_engine_referers', {})
|
||||
key_phrases = month_stats.get('key_phrases', {})
|
||||
|
||||
top_referers = [(k, referers[k]['pages']) for k in referers.keys()]
|
||||
top_referers = sorted(top_referers, key=lambda t: t[1], reverse=True)
|
||||
|
|
|
@ -8,14 +8,17 @@ class IWLADisplayTopVisitors(IPlugin):
|
|||
def __init__(self, iwla):
|
||||
super(IWLADisplayTopVisitors, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
self.requires = ['IWLAPostAnalysisTopVisitors']
|
||||
|
||||
def hook(self):
|
||||
stats = self.iwla.getMonthStats()
|
||||
hits = self.iwla.getValidVisitors()
|
||||
|
||||
top_bandwidth = [(k,hits[k]['bandwidth']) for k in hits.keys()]
|
||||
top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True)
|
||||
top_visitors = [hits[h[0]] for h in top_bandwidth[:10]]
|
||||
|
||||
index = self.iwla.getDisplayIndex()
|
||||
table = DisplayHTMLBlockTable('Top visitors', ['Host', 'Pages', 'Hits', 'Bandwidth', 'Last seen'])
|
||||
for super_hit in stats['top_visitors']:
|
||||
for super_hit in top_visitors:
|
||||
address = super_hit['remote_addr']
|
||||
if self.iwla.getConfValue('display_visitor_ip', False) and\
|
||||
super_hit.get('dns_name_replaced', False):
|
||||
|
|
121
plugins/post_analysis/referers.py
Normal file
121
plugins/post_analysis/referers.py
Normal file
|
@ -0,0 +1,121 @@
|
|||
import time
|
||||
import re
|
||||
import xml.sax.saxutils as saxutils
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
||||
import awstats_data
|
||||
|
||||
class IWLAPostAnalysisReferers(IPlugin):
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisReferers, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def _getSearchEngine(self, hashid):
|
||||
for (k, e) in self.search_engines.items():
|
||||
for (h,h_re) in e['hashid']:
|
||||
if hashid == h:
|
||||
return k
|
||||
return None
|
||||
|
||||
def load(self):
|
||||
domain_name = self.iwla.getConfValue('domain_name', '')
|
||||
|
||||
if not domain_name:
|
||||
print 'domain_name required in conf'
|
||||
return False
|
||||
|
||||
self.own_domain_re = re.compile(r'.*%s.*' % (domain_name))
|
||||
self.search_engines = {}
|
||||
|
||||
for (hashid, name) in awstats_data.search_engines_hashid.items():
|
||||
hashid_re = re.compile(r'.*%s.*' % (hashid))
|
||||
if not name in self.search_engines.keys():
|
||||
self.search_engines[name] = {
|
||||
'hashid' : [(hashid, hashid_re)]
|
||||
}
|
||||
else:
|
||||
self.search_engines[name]['hashid'].append((hashid, hashid_re))
|
||||
#print 'Hashid %s => %s' % (name, hashid)
|
||||
|
||||
for (name, known_url) in awstats_data.search_engines_knwown_url.items():
|
||||
self.search_engines[name]['known_url'] = re.compile(known_url + '(?P<key_phrase>.+)')
|
||||
|
||||
for (engine, not_engine) in awstats_data.not_search_engines_keys.items():
|
||||
not_engine_re = re.compile(r'.*%s.*' % (not_engine))
|
||||
key = self._getSearchEngine(engine)
|
||||
if key:
|
||||
self.search_engines[key]['not_search_engine'] = not_engine_re
|
||||
|
||||
return True
|
||||
|
||||
def _extractKeyPhrase(self, key_phrase_re, parameters, key_phrases):
|
||||
if not parameters or not key_phrase_re: return
|
||||
|
||||
for p in parameters.split('&'):
|
||||
groups = key_phrase_re.match(p)
|
||||
if groups:
|
||||
key_phrase = groups.groupdict()['key_phrase']
|
||||
key_phrase = key_phrase.replace('+', ' ').lower()
|
||||
key_phrase = saxutils.unescape(key_phrase)
|
||||
if not key_phrase in key_phrases.keys():
|
||||
key_phrases[key_phrase] = 1
|
||||
else:
|
||||
key_phrases[key_phrase] += 1
|
||||
break
|
||||
|
||||
def hook(self):
|
||||
start_time = self.iwla.getStartAnalysisTime()
|
||||
start_time = time.mktime(start_time)
|
||||
stats = self.iwla.getCurrentVisists()
|
||||
month_stats = self.iwla.getMonthStats()
|
||||
referers = month_stats.get('referers', {})
|
||||
robots_referers = month_stats.get('robots_referers', {})
|
||||
search_engine_referers = month_stats.get('search_engine_referers', {})
|
||||
key_phrases = month_stats.get('key_phrases', {})
|
||||
|
||||
for (k, super_hit) in stats.items():
|
||||
for r in super_hit['requests']:
|
||||
if time.mktime(r['time_decoded']) < start_time: continue
|
||||
if not r['http_referer']: continue
|
||||
|
||||
uri = r['extract_referer']['extract_uri']
|
||||
is_search_engine = False
|
||||
|
||||
if self.own_domain_re.match(uri): continue
|
||||
|
||||
for (name, engine) in self.search_engines.items():
|
||||
for (hashid, hashid_re) in engine['hashid']:
|
||||
if not hashid_re.match(uri): continue
|
||||
|
||||
not_engine = engine.get('not_search_engine', None)
|
||||
# Try not engine
|
||||
if not_engine and not_engine.match(uri): break
|
||||
is_search_engine = True
|
||||
uri = name
|
||||
|
||||
parameters = r['extract_referer'].get('extract_parameters', None)
|
||||
key_phrase_re = engine.get('known_url', None)
|
||||
|
||||
self._extractKeyPhrase(key_phrase_re, parameters, key_phrases)
|
||||
break
|
||||
|
||||
if is_search_engine:
|
||||
dictionary = search_engine_referers
|
||||
elif super_hit['robot']:
|
||||
dictionary = robots_referers
|
||||
# print '%s => %s' % (uri, super_hit['remote_ip'])
|
||||
else:
|
||||
dictionary = referers
|
||||
if r['is_page']:
|
||||
key = 'pages'
|
||||
else:
|
||||
key = 'hits'
|
||||
if not uri in dictionary: dictionary[uri] = {'pages':0, 'hits':0}
|
||||
dictionary[uri][key] += 1
|
||||
|
||||
month_stats['referers'] = referers
|
||||
month_stats['robots_referers'] = robots_referers
|
||||
month_stats['search_engine_referers'] = search_engine_referers
|
||||
month_stats['key_phrases'] = key_phrases
|
|
@ -1,15 +0,0 @@
|
|||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
||||
class IWLAPostAnalysisTopVisitors(IPlugin):
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPostAnalysisTopVisitors, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def hook(self):
|
||||
hits = self.iwla.getValidVisitors()
|
||||
stats = self.iwla.getMonthStats()
|
||||
top_bandwidth = [(k,hits[k]['bandwidth']) for k in hits.keys()]
|
||||
top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True)
|
||||
stats['top_visitors'] = [hits[h[0]] for h in top_bandwidth[:10]]
|
||||
|
Loading…
Reference in New Issue
Block a user