Do a more generic plugin : page_to_hit
This commit is contained in:
parent
d5db763b48
commit
7405cf237a
3
conf.py
3
conf.py
|
@ -11,11 +11,12 @@ analyzed_filename = 'access.log'
|
|||
DB_ROOT = './output/'
|
||||
DISPLAY_ROOT = './output/'
|
||||
|
||||
pre_analysis_hooks = ['soutade', 'robots']
|
||||
pre_analysis_hooks = ['page_to_hit', 'robots']
|
||||
post_analysis_hooks = ['top_visitors', 'reverse_dns']
|
||||
display_hooks = ['top_visitors']
|
||||
|
||||
reverse_dns_timeout = 0.2
|
||||
page_to_hit_conf = [r'^.+/logo/$']
|
||||
# pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py']
|
||||
# post_analysis_hooks = ['top_visitors.py']
|
||||
# display_hooks = ['top_visitors.py']
|
||||
|
|
|
@ -31,6 +31,8 @@ class IPlugin(object):
|
|||
def preloadPlugins(plugins, iwla):
|
||||
cache_plugins = {}
|
||||
|
||||
print "==> Preload plugins"
|
||||
|
||||
for root in plugins.keys():
|
||||
for plugin_filename in plugins[root]:
|
||||
plugin_path = root + '.' + plugin_filename
|
||||
|
|
13
iwla.py
13
iwla.py
|
@ -1,11 +1,8 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import glob
|
||||
import imp
|
||||
import pickle
|
||||
import gzip
|
||||
import importlib
|
||||
|
@ -126,7 +123,7 @@ class IWLA(object):
|
|||
remote_addr = hit['remote_addr']
|
||||
|
||||
if not remote_addr in self.current_analysis['visits'].keys():
|
||||
self._createUser(hit)
|
||||
self._createVisitor(hit)
|
||||
return
|
||||
|
||||
super_hit = self.current_analysis['visits'][remote_addr]
|
||||
|
@ -160,7 +157,7 @@ class IWLA(object):
|
|||
else:
|
||||
super_hit[hit_key] += 1
|
||||
|
||||
def _createUser(self, hit):
|
||||
def _createVisitor(self, hit):
|
||||
super_hit = self.current_analysis['visits'][hit['remote_addr']] = {}
|
||||
super_hit['remote_addr'] = hit['remote_addr']
|
||||
super_hit['viewed_pages'] = 0
|
||||
|
@ -347,7 +344,7 @@ class IWLA(object):
|
|||
else:
|
||||
if not self.analyse_started:
|
||||
if time.mktime(cur_time) >= time.mktime(t):
|
||||
return
|
||||
return False
|
||||
else:
|
||||
self.analyse_started = True
|
||||
if cur_time.tm_mon != t.tm_mon:
|
||||
|
@ -370,7 +367,7 @@ class IWLA(object):
|
|||
def start(self):
|
||||
self.cache_plugins = preloadPlugins(self.plugins, self)
|
||||
|
||||
print '==> Analysing log'
|
||||
print '==> Analyse previous database'
|
||||
|
||||
self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta()
|
||||
if self.meta_infos['last_time']:
|
||||
|
@ -378,6 +375,8 @@ class IWLA(object):
|
|||
else:
|
||||
self._clearVisits()
|
||||
|
||||
print '==> Analysing log'
|
||||
|
||||
with open(conf.analyzed_filename) as f:
|
||||
for l in f:
|
||||
# print "line " + l
|
||||
|
|
38
plugins/pre_analysis/page_to_hit.py
Normal file
38
plugins/pre_analysis/page_to_hit.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
import re
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
||||
# Basic rule to detect robots
|
||||
|
||||
class IWLAPreAnalysisPageToHit(IPlugin):
|
||||
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPreAnalysisPageToHit, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def load(self):
|
||||
# Remove logo from indefero
|
||||
self.regexps = self.iwla.getConfValue('page_to_hit_conf', [])
|
||||
if not self.regexps: return False
|
||||
self.regexps = map(lambda(r): re.compile(r), self.regexps)
|
||||
|
||||
return True
|
||||
|
||||
def hook(self, iwla):
|
||||
hits = iwla.getCurrentVisists()
|
||||
|
||||
for (k, super_hit) in hits.items():
|
||||
if super_hit['robot']: continue
|
||||
|
||||
for p in super_hit['requests']:
|
||||
if not p['is_page']: continue
|
||||
if int(p['status']) != 200: continue
|
||||
if p['time_decoded'].tm_mday != super_hit['last_access'].tm_mday: continue
|
||||
uri = p['extract_request']['extract_uri']
|
||||
for r in self.regexps:
|
||||
if r.match(uri):
|
||||
p['is_page'] = False
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
||||
break
|
|
@ -1,35 +0,0 @@
|
|||
import re
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
|
||||
# Basic rule to detect robots
|
||||
|
||||
class IWLAPreAnalysisSoutade(IPlugin):
|
||||
|
||||
def __init__(self, iwla):
|
||||
super(IWLAPreAnalysisSoutade, self).__init__(iwla)
|
||||
self.API_VERSION = 1
|
||||
|
||||
def load(self):
|
||||
# Remove logo from indefero
|
||||
self.logo_re = re.compile(r'^.+/logo/$')
|
||||
|
||||
return True
|
||||
|
||||
def hook(self, iwla):
|
||||
hits = iwla.getCurrentVisists()
|
||||
|
||||
for k in hits.keys():
|
||||
super_hit = hits[k]
|
||||
|
||||
if super_hit['robot']: continue
|
||||
|
||||
for p in super_hit['requests']:
|
||||
if not p['is_page']: continue
|
||||
if int(p['status']) != 200: continue
|
||||
if p['time_decoded'].tm_mday != super_hit['last_access'].tm_mday: continue
|
||||
if self.logo_re.match(p['extract_request']['extract_uri']):
|
||||
p['is_page'] = False
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
Loading…
Reference in New Issue
Block a user