Add option count_hit_only_visitors and function isValidForCurrentAnalysis()
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import time
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
@@ -21,21 +20,18 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||
return True
|
||||
|
||||
def hook(self):
|
||||
start_time = self.iwla.getStartAnalysisTime()
|
||||
start_time = time.mktime(start_time)
|
||||
|
||||
hits = self.iwla.getCurrentVisists()
|
||||
viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304])
|
||||
for (k, super_hit) in hits.items():
|
||||
if super_hit['robot']: continue
|
||||
|
||||
for p in super_hit['requests']:
|
||||
if not p['is_page']: continue
|
||||
if time.mktime(p['time_decoded']) < start_time: continue
|
||||
uri = p['extract_request']['extract_uri']
|
||||
for r in self.regexps:
|
||||
if r.match(uri):
|
||||
p['is_page'] = False
|
||||
for request in super_hit['requests']:
|
||||
if not request['is_page']: continue
|
||||
if not self.iwla.isValidForCurrentAnalysis(request): continue
|
||||
uri = request['extract_request']['extract_uri']
|
||||
for regexp in self.regexps:
|
||||
if regexp.match(uri):
|
||||
request['is_page'] = False
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
||||
break
|
||||
|
||||
@@ -18,24 +18,23 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||
# Basic rule to detect robots
|
||||
def hook(self):
|
||||
hits = self.iwla.getCurrentVisists()
|
||||
for k in hits.keys():
|
||||
super_hit = hits[k]
|
||||
|
||||
for (k, super_hit) in hits.items():
|
||||
if super_hit['robot']: continue
|
||||
|
||||
isRobot = False
|
||||
referers = 0
|
||||
|
||||
first_page = super_hit['requests'][0]
|
||||
if first_page['time_decoded'].tm_mday == super_hit['last_access'].tm_mday:
|
||||
for r in self.awstats_robots:
|
||||
if r.match(first_page['http_user_agent']):
|
||||
isRobot = True
|
||||
break
|
||||
if not self.iwla.isValidForCurrentAnalysis(first_page): continue
|
||||
|
||||
if isRobot:
|
||||
super_hit['robot'] = 1
|
||||
continue
|
||||
for r in self.awstats_robots:
|
||||
if r.match(first_page['http_user_agent']):
|
||||
isRobot = True
|
||||
break
|
||||
|
||||
if isRobot:
|
||||
super_hit['robot'] = 1
|
||||
continue
|
||||
|
||||
# 1) no pages view --> robot
|
||||
# if not super_hit['viewed_pages']:
|
||||
|
||||
Reference in New Issue
Block a user