Add top_pages plugin
This commit is contained in:
		
							
								
								
									
										4
									
								
								conf.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								conf.py
									
									
									
									
									
								
							| @@ -16,9 +16,9 @@ DB_ROOT = './output/' | |||||||
| DISPLAY_ROOT = './output/' | DISPLAY_ROOT = './output/' | ||||||
|  |  | ||||||
| pre_analysis_hooks = ['page_to_hit', 'robots'] | pre_analysis_hooks = ['page_to_hit', 'robots'] | ||||||
| post_analysis_hooks = ['referers'] | post_analysis_hooks = ['referers', 'top_pages'] | ||||||
| # post_analysis_hooks = ['top_visitors', 'reverse_dns'] | # post_analysis_hooks = ['top_visitors', 'reverse_dns'] | ||||||
| display_hooks = ['top_visitors', 'all_visits', 'referers'] | display_hooks = ['top_visitors', 'all_visits', 'referers', 'top_pages'] | ||||||
|  |  | ||||||
| reverse_dns_timeout = 0.2 | reverse_dns_timeout = 0.2 | ||||||
| page_to_hit_conf = [r'^.+/logo/$'] | page_to_hit_conf = [r'^.+/logo/$'] | ||||||
|   | |||||||
							
								
								
									
										3
									
								
								iwla.py
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								iwla.py
									
									
									
									
									
								
							| @@ -140,9 +140,8 @@ class IWLA(object): | |||||||
|         request = hit['extract_request'] |         request = hit['extract_request'] | ||||||
|  |  | ||||||
|         if 'extract_uri' in request.keys(): |         if 'extract_uri' in request.keys(): | ||||||
|  |             uri = request['extract_uri'] = request['http_uri'] | ||||||
|         uri = request['extract_uri'] |         uri = request['extract_uri'] | ||||||
|         else: |  | ||||||
|             uri = request['http_uri'] |  | ||||||
|  |  | ||||||
|         hit['is_page'] = self.isPage(uri) |         hit['is_page'] = self.isPage(uri) | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										42
									
								
								plugins/display/top_pages.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								plugins/display/top_pages.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | |||||||
|  | import time | ||||||
|  |  | ||||||
|  | from iwla import IWLA | ||||||
|  | from iplugin import IPlugin | ||||||
|  | from display import * | ||||||
|  |  | ||||||
|  | class IWLADisplayTopPages(IPlugin): | ||||||
|  |     def __init__(self, iwla): | ||||||
|  |         super(IWLADisplayTopPages, self).__init__(iwla) | ||||||
|  |         self.API_VERSION = 1 | ||||||
|  |         self.requires = ['IWLAPostAnalysisTopPages'] | ||||||
|  |  | ||||||
|  |     def hook(self): | ||||||
|  |         top_pages = self.iwla.getMonthStats()['top_pages'] | ||||||
|  |  | ||||||
|  |         top_pages = sorted(top_pages.items(), key=lambda t: t[1], reverse=True) | ||||||
|  |  | ||||||
|  |         index = self.iwla.getDisplayIndex() | ||||||
|  |          | ||||||
|  |         table = DisplayHTMLBlockTable('Top Pages', ['URI', 'Entrance'])         | ||||||
|  |         for (uri, entrance) in top_pages[:10]: | ||||||
|  |             table.appendRow([uri, entrance]) | ||||||
|  |         index.appendBlock(table) | ||||||
|  |  | ||||||
|  |         cur_time = self.iwla.getCurTime() | ||||||
|  |         title = time.strftime('Top Pages - %B %Y', cur_time) | ||||||
|  |  | ||||||
|  |         filename = 'top_pages_%d.html' % (cur_time.tm_mon) | ||||||
|  |         path = '%d/%s' % (cur_time.tm_year, filename) | ||||||
|  |  | ||||||
|  |         page = DisplayHTMLPage(title, path) | ||||||
|  |         table = DisplayHTMLBlockTable('Top Pages', ['URI', 'Entrance'])         | ||||||
|  |         for (uri, entrance) in top_pages: | ||||||
|  |             table.appendRow([uri, entrance]) | ||||||
|  |         page.appendBlock(table) | ||||||
|  |                          | ||||||
|  |         display = self.iwla.getDisplay() | ||||||
|  |         display.addPage(page) | ||||||
|  |  | ||||||
|  |         block = DisplayHTMLRawBlock() | ||||||
|  |         block.setRawHTML('<a href=\'%s\'>All pages</a>' % (filename)) | ||||||
|  |         index.appendBlock(block) | ||||||
| @@ -70,6 +70,7 @@ class IWLAPostAnalysisReferers(IPlugin): | |||||||
|         start_time = time.mktime(start_time) |         start_time = time.mktime(start_time) | ||||||
|         stats = self.iwla.getCurrentVisists() |         stats = self.iwla.getCurrentVisists() | ||||||
|         month_stats = self.iwla.getMonthStats() |         month_stats = self.iwla.getMonthStats() | ||||||
|  |          | ||||||
|         referers = month_stats.get('referers', {}) |         referers = month_stats.get('referers', {}) | ||||||
|         robots_referers = month_stats.get('robots_referers', {}) |         robots_referers = month_stats.get('robots_referers', {}) | ||||||
|         search_engine_referers = month_stats.get('search_engine_referers', {}) |         search_engine_referers = month_stats.get('search_engine_referers', {}) | ||||||
|   | |||||||
							
								
								
									
										43
									
								
								plugins/post_analysis/top_pages.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								plugins/post_analysis/top_pages.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,43 @@ | |||||||
|  | import time | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from iwla import IWLA | ||||||
|  | from iplugin import IPlugin | ||||||
|  |  | ||||||
|  | class IWLAPostAnalysisTopPages(IPlugin): | ||||||
|  |     def __init__(self, iwla): | ||||||
|  |         super(IWLAPostAnalysisTopPages, self).__init__(iwla) | ||||||
|  |         self.API_VERSION = 1 | ||||||
|  |  | ||||||
|  |     def load(self): | ||||||
|  |         self.index_re = re.compile(r'/index.*') | ||||||
|  |         return True | ||||||
|  |          | ||||||
|  |     def hook(self): | ||||||
|  |         start_time = self.iwla.getStartAnalysisTime() | ||||||
|  |         start_time = time.mktime(start_time) | ||||||
|  |  | ||||||
|  |         stats = self.iwla.getCurrentVisists() | ||||||
|  |         month_stats = self.iwla.getMonthStats() | ||||||
|  |          | ||||||
|  |         top_pages = month_stats.get('top_pages', {}) | ||||||
|  |  | ||||||
|  |         for (k, super_hit) in stats.items(): | ||||||
|  |             if super_hit['robot']: continue | ||||||
|  |             for r in super_hit['requests']: | ||||||
|  |                 if not r['is_page']: continue | ||||||
|  |  | ||||||
|  |                 if time.mktime(r['time_decoded']) < start_time: continue | ||||||
|  |  | ||||||
|  |                 uri = r['extract_request']['extract_uri'] | ||||||
|  |                 if self.index_re.match(uri): | ||||||
|  |                     uri = '/' | ||||||
|  |                  | ||||||
|  |                 uri = "%s%s" % (r.get('server_name', ''), uri) | ||||||
|  |  | ||||||
|  |                 if not uri in top_pages.keys(): | ||||||
|  |                     top_pages[uri] = 1 | ||||||
|  |                 else: | ||||||
|  |                     top_pages[uri] += 1 | ||||||
|  |  | ||||||
|  |         month_stats['top_pages'] = top_pages | ||||||
		Reference in New Issue
	
	Block a user