Add option count_hit_only_visitors and function isValidForCurrentAnalysis()
This commit is contained in:
		| @@ -1,5 +1,4 @@ | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from iwla import IWLA | ||||
| from iplugin import IPlugin | ||||
| @@ -21,21 +20,18 @@ class IWLAPreAnalysisPageToHit(IPlugin): | ||||
|         return True | ||||
|  | ||||
|     def hook(self): | ||||
|         start_time = self.iwla.getStartAnalysisTime() | ||||
|         start_time = time.mktime(start_time) | ||||
|  | ||||
|         hits = self.iwla.getCurrentVisists() | ||||
|         viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304]) | ||||
|         for (k, super_hit) in hits.items(): | ||||
|             if super_hit['robot']: continue | ||||
|  | ||||
|             for p in super_hit['requests']: | ||||
|                 if not p['is_page']: continue | ||||
|                 if time.mktime(p['time_decoded']) < start_time: continue | ||||
|                 uri = p['extract_request']['extract_uri'] | ||||
|                 for r in self.regexps: | ||||
|                     if r.match(uri): | ||||
|                         p['is_page'] = False | ||||
|             for request in super_hit['requests']: | ||||
|                 if not request['is_page']: continue | ||||
|                 if not self.iwla.isValidForCurrentAnalysis(request): continue | ||||
|                 uri = request['extract_request']['extract_uri'] | ||||
|                 for regexp in self.regexps: | ||||
|                     if regexp.match(uri): | ||||
|                         request['is_page'] = False | ||||
|                         super_hit['viewed_pages'] -= 1 | ||||
|                         super_hit['viewed_hits'] += 1 | ||||
|                         break | ||||
|   | ||||
| @@ -18,24 +18,23 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
| # Basic rule to detect robots | ||||
|     def hook(self): | ||||
|         hits = self.iwla.getCurrentVisists() | ||||
|         for k in hits.keys(): | ||||
|             super_hit = hits[k] | ||||
|  | ||||
|         for (k, super_hit) in hits.items(): | ||||
|             if super_hit['robot']: continue | ||||
|  | ||||
|             isRobot = False | ||||
|             referers = 0 | ||||
|  | ||||
|             first_page = super_hit['requests'][0] | ||||
|             if first_page['time_decoded'].tm_mday == super_hit['last_access'].tm_mday: | ||||
|                 for r in self.awstats_robots: | ||||
|                     if r.match(first_page['http_user_agent']): | ||||
|                         isRobot = True | ||||
|                         break | ||||
|             if not self.iwla.isValidForCurrentAnalysis(first_page): continue | ||||
|  | ||||
|                 if isRobot: | ||||
|                     super_hit['robot'] = 1 | ||||
|                     continue | ||||
|             for r in self.awstats_robots: | ||||
|                 if r.match(first_page['http_user_agent']): | ||||
|                     isRobot = True | ||||
|                     break | ||||
|  | ||||
|             if isRobot: | ||||
|                 super_hit['robot'] = 1 | ||||
|                 continue | ||||
|  | ||||
| # 1) no pages view --> robot | ||||
|             # if not super_hit['viewed_pages']: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user