Add option count_hit_only_visitors and function isValidForCurrentAnalysis()

This commit is contained in:
Grégory Soutadé
2014-11-27 09:01:51 +01:00
parent 6b0ed18f35
commit dd8349ab08
10 changed files with 54 additions and 47 deletions

View File

@@ -1,5 +1,4 @@
import re
import time
from iwla import IWLA
from iplugin import IPlugin
@@ -21,21 +20,18 @@ class IWLAPreAnalysisPageToHit(IPlugin):
return True
def hook(self):
start_time = self.iwla.getStartAnalysisTime()
start_time = time.mktime(start_time)
hits = self.iwla.getCurrentVisists()
viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304])
for (k, super_hit) in hits.items():
if super_hit['robot']: continue
for p in super_hit['requests']:
if not p['is_page']: continue
if time.mktime(p['time_decoded']) < start_time: continue
uri = p['extract_request']['extract_uri']
for r in self.regexps:
if r.match(uri):
p['is_page'] = False
for request in super_hit['requests']:
if not request['is_page']: continue
if not self.iwla.isValidForCurrentAnalysis(request): continue
uri = request['extract_request']['extract_uri']
for regexp in self.regexps:
if regexp.match(uri):
request['is_page'] = False
super_hit['viewed_pages'] -= 1
super_hit['viewed_hits'] += 1
break

View File

@@ -18,24 +18,23 @@ class IWLAPreAnalysisRobots(IPlugin):
# Basic rule to detect robots
def hook(self):
hits = self.iwla.getCurrentVisists()
for k in hits.keys():
super_hit = hits[k]
for (k, super_hit) in hits.items():
if super_hit['robot']: continue
isRobot = False
referers = 0
first_page = super_hit['requests'][0]
if first_page['time_decoded'].tm_mday == super_hit['last_access'].tm_mday:
for r in self.awstats_robots:
if r.match(first_page['http_user_agent']):
isRobot = True
break
if not self.iwla.isValidForCurrentAnalysis(first_page): continue
if isRobot:
super_hit['robot'] = 1
continue
for r in self.awstats_robots:
if r.match(first_page['http_user_agent']):
isRobot = True
break
if isRobot:
super_hit['robot'] = 1
continue
# 1) no pages view --> robot
# if not super_hit['viewed_pages']: