Add option count_hit_only_visitors and function isValidForCurrentAnalysis()
This commit is contained in:
		
							
								
								
									
										5
									
								
								conf.py
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								conf.py
									
									
									
									
									
								
							| @@ -22,6 +22,5 @@ display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages'] | ||||
|  | ||||
| reverse_dns_timeout = 0.2 | ||||
| page_to_hit_conf = [r'^.+/logo/$'] | ||||
| # pre_analysis_hooks = ['H002_soutade.py', 'H001_robot.py'] | ||||
| # post_analysis_hooks = ['top_visitors.py'] | ||||
| # display_hooks = ['top_visitors.py'] | ||||
|  | ||||
| count_hit_only_visitors = False | ||||
|   | ||||
| @@ -22,3 +22,5 @@ display_hooks = [] | ||||
|  | ||||
| pages_extensions = ['/', 'html', 'xhtml', 'py', 'pl', 'rb', 'php'] | ||||
| viewed_http_codes = [200, 304] | ||||
|  | ||||
| count_hit_only_visitors = True | ||||
|   | ||||
							
								
								
									
										32
									
								
								iwla.py
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								iwla.py
									
									
									
									
									
								
							| @@ -75,6 +75,10 @@ class IWLA(object): | ||||
|     def getStartAnalysisTime(self): | ||||
|         return self.meta_infos['start_analysis_time'] | ||||
|  | ||||
|     def isValidForCurrentAnalysis(self, request): | ||||
|         cur_time = self.meta_infos['start_analysis_time'] | ||||
|         return (time.mktime(cur_time) < time.mktime(request['time_decoded'])) | ||||
|  | ||||
|     def _clearMeta(self): | ||||
|         self.meta_infos = { | ||||
|             'last_time' : None | ||||
| @@ -264,15 +268,15 @@ class IWLA(object): | ||||
|         #stats['requests'] = set() | ||||
|         stats['nb_visitors'] = 0 | ||||
|  | ||||
|         for k in visits.keys(): | ||||
|             super_hit = visits[k] | ||||
|         for (k, super_hit) in visits.items(): | ||||
|             if super_hit['robot']: | ||||
|                 stats['not_viewed_bandwidth'] += super_hit['bandwidth'] | ||||
|                 continue | ||||
|  | ||||
|             #print "[%s] =>\t%d/%d" % (k, super_hit['viewed_pages'], super_hit['viewed_hits']) | ||||
|  | ||||
|             if not super_hit['hit_only']: | ||||
|             if conf.count_hit_only_visitors or\ | ||||
|                     super_hit['viewed_pages']: | ||||
|                 stats['nb_visitors'] += 1 | ||||
|             stats['viewed_bandwidth'] += super_hit['bandwidth'] | ||||
|             stats['viewed_pages'] += super_hit['viewed_pages'] | ||||
| @@ -298,7 +302,14 @@ class IWLA(object): | ||||
|  | ||||
|         self.current_analysis['month_stats'] = stats | ||||
|  | ||||
|         self.valid_visitors = {k: v for (k,v) in visits.items() if not visits[k]['robot']} | ||||
|         self.valid_visitors = {} | ||||
|         for (k,v) in visits.items(): | ||||
|             if v['robot']: continue | ||||
|             if conf.count_hit_only_visitors and\ | ||||
|                     (not v['viewed_pages']): | ||||
|                 continue | ||||
|             self.valid_visitors[k] = v | ||||
|  | ||||
|         self._callPlugins(conf.POST_HOOK_DIRECTORY) | ||||
|  | ||||
|         path = self.getDBFilename(cur_time) | ||||
| @@ -331,9 +342,12 @@ class IWLA(object): | ||||
|                 for k in stats.keys(): | ||||
|                     stats[k] -= self.current_analysis['days_stats'][last_day][k] | ||||
|                 stats['nb_visitors'] = 0 | ||||
|                 for k in visits.keys(): | ||||
|                     if visits[k]['robot']: continue | ||||
|                     if visits[k]['last_access'].tm_mday == cur_time.tm_mday: | ||||
|                 for (k,v) in visits.items(): | ||||
|                     if v['robot']: continue | ||||
|                     if conf.count_hit_only_visitors and\ | ||||
|                             (not v['viewed_pages']): | ||||
|                         continue | ||||
|                     if v['last_access'].tm_mday == cur_time.tm_mday: | ||||
|                         stats['nb_visitors'] += 1 | ||||
|         print stats | ||||
|  | ||||
| @@ -349,7 +363,7 @@ class IWLA(object): | ||||
|             self.analyse_started = True | ||||
|         else: | ||||
|             if not self.analyse_started: | ||||
|                 if time.mktime(cur_time) >= time.mktime(t): | ||||
|                 if not self.isValidForCurrentAnalysis(hit): | ||||
|                     return False | ||||
|                 else: | ||||
|                     self.analyse_started = True | ||||
| @@ -374,7 +388,7 @@ class IWLA(object): | ||||
|         return True | ||||
|  | ||||
|     def start(self): | ||||
|         print '==> Analyse previous database' | ||||
|         print '==> Load previous database' | ||||
|  | ||||
|         self.meta_infos = self._deserialize(conf.META_PATH) or self._clearMeta() | ||||
|         if self.meta_infos['last_time']: | ||||
|   | ||||
| @@ -11,6 +11,8 @@ class IWLADisplayAllVisits(IPlugin): | ||||
|  | ||||
|     def hook(self): | ||||
|         hits = self.iwla.getValidVisitors() | ||||
|         display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False) | ||||
|  | ||||
|         last_access = sorted(hits.values(), key=lambda t: t['last_access'], reverse=True) | ||||
|  | ||||
|         cur_time = self.iwla.getCurTime() | ||||
| @@ -23,7 +25,7 @@ class IWLADisplayAllVisits(IPlugin): | ||||
|         table = DisplayHTMLBlockTable('Last seen', ['Host', 'Pages', 'Hits', 'Bandwidth', 'Last seen']) | ||||
|         for super_hit in last_access: | ||||
|             address = super_hit['remote_addr'] | ||||
|             if self.iwla.getConfValue('display_visitor_ip', False) and\ | ||||
|             if display_visitor_ip and\ | ||||
|                     super_hit.get('dns_name_replaced', False): | ||||
|                 address = '%s [%s]' % (address, super_hit['remote_ip']) | ||||
|  | ||||
|   | ||||
| @@ -91,7 +91,6 @@ class IWLADisplayReferers(IPlugin): | ||||
|         index.appendBlock(table) | ||||
|  | ||||
|         # All key phrases in a file | ||||
|         cur_time = self.iwla.getCurTime() | ||||
|         title = time.strftime('Key Phrases - %B %Y', cur_time) | ||||
|  | ||||
|         filename = 'key_phrases_%d.html' % (cur_time.tm_mon) | ||||
|   | ||||
| @@ -11,8 +11,11 @@ class IWLADisplayTopVisitors(IPlugin): | ||||
|  | ||||
|     def hook(self): | ||||
|         hits = self.iwla.getValidVisitors() | ||||
|         count_hit_only = self.iwla.getConfValue('count_hit_only_visitors', False) | ||||
|         display_visitor_ip = self.iwla.getConfValue('display_visitor_ip', False) | ||||
|  | ||||
|         top_bandwidth = [(k,hits[k]['bandwidth']) for k in hits.keys()] | ||||
|         top_bandwidth = [(k,v['bandwidth']) for (k,v) in hits.items() \ | ||||
|                              if count_hit_only or v['viewed_pages']] | ||||
|         top_bandwidth = sorted(top_bandwidth, key=lambda t: t[1], reverse=True) | ||||
|         top_visitors = [hits[h[0]] for h in top_bandwidth[:10]] | ||||
|  | ||||
| @@ -20,7 +23,7 @@ class IWLADisplayTopVisitors(IPlugin): | ||||
|         table = DisplayHTMLBlockTable('Top visitors', ['Host', 'Pages', 'Hits', 'Bandwidth', 'Last seen']) | ||||
|         for super_hit in top_visitors: | ||||
|             address = super_hit['remote_addr'] | ||||
|             if self.iwla.getConfValue('display_visitor_ip', False) and\ | ||||
|             if display_visitor_ip and\ | ||||
|                     super_hit.get('dns_name_replaced', False): | ||||
|                 address = '%s [%s]' % (address, super_hit['remote_ip']) | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| import time | ||||
| import re | ||||
| import xml.sax.saxutils as saxutils | ||||
|  | ||||
| @@ -66,8 +65,6 @@ class IWLAPostAnalysisReferers(IPlugin): | ||||
|                 break | ||||
|  | ||||
|     def hook(self): | ||||
|         start_time = self.iwla.getStartAnalysisTime() | ||||
|         start_time = time.mktime(start_time) | ||||
|         stats = self.iwla.getCurrentVisists() | ||||
|         month_stats = self.iwla.getMonthStats() | ||||
|          | ||||
| @@ -78,7 +75,7 @@ class IWLAPostAnalysisReferers(IPlugin): | ||||
|  | ||||
|         for (k, super_hit) in stats.items(): | ||||
|             for r in super_hit['requests']: | ||||
|                 if time.mktime(r['time_decoded']) < start_time: continue | ||||
|                 if not self.iwla.isValidForCurrentAnalysis(r): continue | ||||
|                 if not r['http_referer']: continue | ||||
|                  | ||||
|                 uri = r['extract_referer']['extract_uri'] | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| import time | ||||
| import re | ||||
|  | ||||
| from iwla import IWLA | ||||
| @@ -14,9 +13,6 @@ class IWLAPostAnalysisTopPages(IPlugin): | ||||
|         return True | ||||
|          | ||||
|     def hook(self): | ||||
|         start_time = self.iwla.getStartAnalysisTime() | ||||
|         start_time = time.mktime(start_time) | ||||
|  | ||||
|         stats = self.iwla.getCurrentVisists() | ||||
|         month_stats = self.iwla.getMonthStats() | ||||
|          | ||||
| @@ -27,7 +23,7 @@ class IWLAPostAnalysisTopPages(IPlugin): | ||||
|             for r in super_hit['requests']: | ||||
|                 if not r['is_page']: continue | ||||
|  | ||||
|                 if time.mktime(r['time_decoded']) < start_time: continue | ||||
|                 if not self.iwla.isValidForCurrentAnalysis(r): continue | ||||
|  | ||||
|                 uri = r['extract_request']['extract_uri'] | ||||
|                 if self.index_re.match(uri): | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from iwla import IWLA | ||||
| from iplugin import IPlugin | ||||
| @@ -21,21 +20,18 @@ class IWLAPreAnalysisPageToHit(IPlugin): | ||||
|         return True | ||||
|  | ||||
|     def hook(self): | ||||
|         start_time = self.iwla.getStartAnalysisTime() | ||||
|         start_time = time.mktime(start_time) | ||||
|  | ||||
|         hits = self.iwla.getCurrentVisists() | ||||
|         viewed_http_codes = self.iwla.getConfValue('viewed_http_codes', [200, 304]) | ||||
|         for (k, super_hit) in hits.items(): | ||||
|             if super_hit['robot']: continue | ||||
|  | ||||
|             for p in super_hit['requests']: | ||||
|                 if not p['is_page']: continue | ||||
|                 if time.mktime(p['time_decoded']) < start_time: continue | ||||
|                 uri = p['extract_request']['extract_uri'] | ||||
|                 for r in self.regexps: | ||||
|                     if r.match(uri): | ||||
|                         p['is_page'] = False | ||||
|             for request in super_hit['requests']: | ||||
|                 if not request['is_page']: continue | ||||
|                 if not self.iwla.isValidForCurrentAnalysis(request): continue | ||||
|                 uri = request['extract_request']['extract_uri'] | ||||
|                 for regexp in self.regexps: | ||||
|                     if regexp.match(uri): | ||||
|                         request['is_page'] = False | ||||
|                         super_hit['viewed_pages'] -= 1 | ||||
|                         super_hit['viewed_hits'] += 1 | ||||
|                         break | ||||
|   | ||||
| @@ -18,24 +18,23 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
| # Basic rule to detect robots | ||||
|     def hook(self): | ||||
|         hits = self.iwla.getCurrentVisists() | ||||
|         for k in hits.keys(): | ||||
|             super_hit = hits[k] | ||||
|  | ||||
|         for (k, super_hit) in hits.items(): | ||||
|             if super_hit['robot']: continue | ||||
|  | ||||
|             isRobot = False | ||||
|             referers = 0 | ||||
|  | ||||
|             first_page = super_hit['requests'][0] | ||||
|             if first_page['time_decoded'].tm_mday == super_hit['last_access'].tm_mday: | ||||
|                 for r in self.awstats_robots: | ||||
|                     if r.match(first_page['http_user_agent']): | ||||
|                         isRobot = True | ||||
|                         break | ||||
|             if not self.iwla.isValidForCurrentAnalysis(first_page): continue | ||||
|  | ||||
|                 if isRobot: | ||||
|                     super_hit['robot'] = 1 | ||||
|                     continue | ||||
|             for r in self.awstats_robots: | ||||
|                 if r.match(first_page['http_user_agent']): | ||||
|                     isRobot = True | ||||
|                     break | ||||
|  | ||||
|             if isRobot: | ||||
|                 super_hit['robot'] = 1 | ||||
|                 continue | ||||
|  | ||||
| # 1) no pages view --> robot | ||||
|             # if not super_hit['viewed_pages']: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user