Add option count_hit_only_visitors and function isValidForCurrentAnalysis()

This commit is contained in:
Grégory Soutadé
2014-11-27 09:01:51 +01:00
parent 6b0ed18f35
commit dd8349ab08
10 changed files with 54 additions and 47 deletions

View File

@@ -11,6 +11,8 @@ class IWLADisplayAllVisits(IPlugin):
def hook(self):
hits = self.iwla.getValidVisitors()
display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False)
last_access = sorted(hits.values(), key=lambda t: t['last_access'], reverse=True)
cur_time = self.iwla.getCurTime()
@@ -23,7 +25,7 @@ class IWLADisplayAllVisits(IPlugin):
table = DisplayHTMLBlockTable('Last seen', ['Host', 'Pages', 'Hits', 'Bandwidth', 'Last seen'])
for super_hit in last_access:
address = super_hit['remote_addr']
if self.iwla.getConfValue('display_visitor_ip', False) and\
if display_visitor_ip and\
super_hit.get('dns_name_replaced', False):
address = '%s [%s]' % (address, super_hit['remote_ip'])

View File

@@ -91,7 +91,6 @@ class IWLADisplayReferers(IPlugin):
index.appendBlock(table)
# All key phrases in a file
cur_time = self.iwla.getCurTime()
title = time.strftime('Key Phrases - %B %Y', cur_time)
filename = 'key_phrases_%d.html' % (cur_time.tm_mon)

View File

@@ -11,8 +11,11 @@ class IWLADisplayTopVisitors(IPlugin):
def hook(self):
hits = self.iwla.getValidVisitors()
count_hit_only = self.iwla.getConfValue('count_hit_only_visitors', False)
display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False)
top_bandwidth = [(k,hits[k]['bandwidth']) for k in hits.keys()]
top_bandwidth = [(k,v['bandwidth']) for (k,v) in hits.items() \
if count_hit_only or v['viewed_pages']]
top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True)
top_visitors = [hits[h[0]] for h in top_bandwidth[:10]]
@@ -20,7 +23,7 @@ class IWLADisplayTopVisitors(IPlugin):
table = DisplayHTMLBlockTable('Top visitors', ['Host', 'Pages', 'Hits', 'Bandwidth', 'Last seen'])
for super_hit in top_visitors:
address = super_hit['remote_addr']
if self.iwla.getConfValue('display_visitor_ip', False) and\
if display_visitor_ip and\
super_hit.get('dns_name_replaced', False):
address = '%s [%s]' % (address, super_hit['remote_ip'])

View File

@@ -1,4 +1,3 @@
import time
import re
import xml.sax.saxutils as saxutils
@@ -66,8 +65,6 @@ class IWLAPostAnalysisReferers(IPlugin):
break
def hook(self):
start_time = self.iwla.getStartAnalysisTime()
start_time = time.mktime(start_time)
stats = self.iwla.getCurrentVisists()
month_stats = self.iwla.getMonthStats()
@@ -78,7 +75,7 @@ class IWLAPostAnalysisReferers(IPlugin):
for (k, super_hit) in stats.items():
for r in super_hit['requests']:
if time.mktime(r['time_decoded']) < start_time: continue
if not self.iwla.isValidForCurrentAnalysis(r): continue
if not r['http_referer']: continue
uri = r['extract_referer']['extract_uri']

View File

@@ -1,4 +1,3 @@
import time
import re
from iwla import IWLA
@@ -14,9 +13,6 @@ class IWLAPostAnalysisTopPages(IPlugin):
return True
def hook(self):
start_time = self.iwla.getStartAnalysisTime()
start_time = time.mktime(start_time)
stats = self.iwla.getCurrentVisists()
month_stats = self.iwla.getMonthStats()
@@ -27,7 +23,7 @@ class IWLAPostAnalysisTopPages(IPlugin):
for r in super_hit['requests']:
if not r['is_page']: continue
if time.mktime(r['time_decoded']) < start_time: continue
if not self.iwla.isValidForCurrentAnalysis(r): continue
uri = r['extract_request']['extract_uri']
if self.index_re.match(uri):

View File

@@ -1,5 +1,4 @@
import re
import time
from iwla import IWLA
from iplugin import IPlugin
@@ -21,21 +20,18 @@ class IWLAPreAnalysisPageToHit(IPlugin):
return True
def hook(self):
start_time = self.iwla.getStartAnalysisTime()
start_time = time.mktime(start_time)
hits = self.iwla.getCurrentVisists()
viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304])
for (k, super_hit) in hits.items():
if super_hit['robot']: continue
for p in super_hit['requests']:
if not p['is_page']: continue
if time.mktime(p['time_decoded']) < start_time: continue
uri = p['extract_request']['extract_uri']
for r in self.regexps:
if r.match(uri):
p['is_page'] = False
for request in super_hit['requests']:
if not request['is_page']: continue
if not self.iwla.isValidForCurrentAnalysis(request): continue
uri = request['extract_request']['extract_uri']
for regexp in self.regexps:
if regexp.match(uri):
request['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1
break

View File

@@ -18,24 +18,23 @@ class IWLAPreAnalysisRobots(IPlugin):
# Basic rule to detect robots
def hook(self):
hits = self.iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]
for (k, super_hit) in hits.items():
if super_hit['robot']: continue
isRobot = False
referers = 0
first_page = super_hit['requests'][0]
if first_page['time_decoded'].tm_mday == super_hit['last_access'].tm_mday:
for r in self.awstats_robots:
if r.match(first_page['http_user_agent']):
isRobot = True
break
if not self.iwla.isValidForCurrentAnalysis(first_page): continue
if isRobot:
super_hit['robot'] = 1
continue
for r in self.awstats_robots:
if r.match(first_page['http_user_agent']):
isRobot = True
break
if isRobot:
super_hit['robot'] = 1
continue
# 1) no pages view --> robot
# if not super_hit['viewed_pages']: