| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  | #DB_ROOT = './output_db' | 
					
						
							|  |  |  | #DISPLAY_ROOT = './output_dev' | 
					
						
							| 
									
										
										
										
											2014-11-20 15:25:43 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-08 18:38:40 +01:00
										 |  |  | # Web server log | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  | analyzed_filename = '/var/log/apache2/soutade.fr_access.log.1,/var/log/apache2/soutade.fr_access.log' | 
					
						
							| 
									
										
										
										
											2014-11-20 15:25:43 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-08 18:38:40 +01:00
										 |  |  | # Domain name to analyze | 
					
						
							| 
									
										
										
										
											2014-11-26 16:17:16 +01:00
										 |  |  | domain_name = 'soutade.fr' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-08 18:38:40 +01:00
										 |  |  | # Display visitor IP in addition to resolved names | 
					
						
							| 
									
										
										
										
											2015-02-19 20:23:13 +01:00
										 |  |  | display_visitor_ip = True | 
					
						
							| 
									
										
										
										
											2014-11-26 16:17:16 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-08 18:38:40 +01:00
										 |  |  | # Hooks used | 
					
						
							| 
									
										
										
										
											2014-11-25 16:22:07 +01:00
										 |  |  | pre_analysis_hooks = ['page_to_hit', 'robots'] | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  | post_analysis_hooks = ['reverse_dns', 'referers', 'top_pages', 'subdomains', 'top_downloads', 'operating_systems', 'browsers', 'hours_stats', 'feeds', 'ip_to_geo', 'filter_users'] | 
					
						
							|  |  |  | display_hooks = ['filter_users', 'top_visitors', 'all_visits', 'referers', 'top_pages', 'subdomains', 'top_downloads', 'referers_diff', 'ip_to_geo', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'top_downloads_diff', 'robot_bandwidth', 'top_pages_diff', 'all_visits_enlight'] | 
					
						
							| 
									
										
										
										
											2014-11-21 10:41:29 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-08 18:38:40 +01:00
										 |  |  | # Reverse DNS timeout | 
					
						
							| 
									
										
										
										
											2014-11-24 21:37:37 +01:00
										 |  |  | reverse_dns_timeout = 0.2 | 
					
						
							| 
									
										
										
										
											2014-12-08 18:38:40 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Count this addresses as hit | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  | page_to_hit_conf = [r'.+/logo[/]?', r'.+/.+\.py'] | 
					
						
							| 
									
										
										
										
											2014-12-08 18:38:40 +01:00
										 |  |  | # Count this addresses as page | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  | hit_to_page_conf = [ | 
					
						
							|  |  |  |     # Blog | 
					
						
							|  |  |  |     r'.+/category/.+', r'.+/tag/.+', r'.+/archive/.+', r'.+/ljdc[/]?', r'.*/search/.+', | 
					
						
							|  |  |  |     # Indefero | 
					
						
							|  |  |  |     r'.+/source/tree/.*', r'.+/source/file/.*', r'.*/index$', | 
					
						
							|  |  |  |     # Denote | 
					
						
							|  |  |  |     r'.*/edit$', r'.*/add$', r'.+/[0-9]+$', r'.*/preferences$', r'.*/search$', r'.*/public_notes$', r'.*/template.*', r'.*/templates$', | 
					
						
							|  |  |  |     # Music | 
					
						
							|  |  |  |     r'.*/music/.*', | 
					
						
							|  |  |  | ] | 
					
						
							| 
									
										
										
										
											2014-12-11 22:31:40 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Because it's too long to build HTML when there is too much entries | 
					
						
							|  |  |  | max_hits_displayed = 100 | 
					
						
							|  |  |  | max_downloads_displayed = 100 | 
					
						
							| 
									
										
										
										
											2014-12-15 21:28:25 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-29 09:32:09 +01:00
										 |  |  | # Locale in French | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  | locale = 'fr' | 
					
						
							| 
									
										
										
										
											2015-02-19 20:23:13 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-03 08:58:47 +02:00
										 |  |  | # Filtered IP | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  | filtered_ip = ['82.232.68.211', '78.153.243.190', '176.152.215.133', | 
					
						
							|  |  |  |                '83.199.87.88', # Lanion | 
					
						
							|  |  |  |                '193.136.115.1' # Lisbon | 
					
						
							|  |  |  |                ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | # google_re = re.compile('.*google.*') | 
					
						
							|  |  |  | # duck_re = re.compile('.*duckduckgo.*') | 
					
						
							|  |  |  | soutade_re = re.compile('.*soutade.fr.*') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def my_filter(iwla, visitor): | 
					
						
							|  |  |  |     # Manage filtered users | 
					
						
							|  |  |  |     if visitor.get('filtered', False): return True | 
					
						
							|  |  |  |     filtered = False | 
					
						
							|  |  |  |     req = visitor['requests'][0] | 
					
						
							|  |  |  |     if visitor.get('country_code', '') == 'fr' and\ | 
					
						
							|  |  |  |        req['server_name'] in ('blog.soutade.fr', 'www.soutade.fr', 'soutade.fr') and \ | 
					
						
							|  |  |  |        req['extract_request']['extract_uri'] in ('/', '/index.html', '/about.html'): | 
					
						
							|  |  |  |         referer = req['extract_referer']['extract_uri'] | 
					
						
							|  |  |  |         if referer in ('', '-'): | 
					
						
							|  |  |  |             # print(f'{req} MATCHED') | 
					
						
							|  |  |  |             filtered = True | 
					
						
							|  |  |  |         elif not soutade_re.match(referer): | 
					
						
							|  |  |  |         # if google_re.match(referer) or duck_re.match(referer): | 
					
						
							|  |  |  |             # print(f'{req} MATCHED') | 
					
						
							|  |  |  |             filtered = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Manage enlight users | 
					
						
							|  |  |  |     if visitor.get('enlight', None) is None and not visitor.get('feed_parser', False): | 
					
						
							|  |  |  |         enlight = False | 
					
						
							|  |  |  |         for i, req in enumerate(visitor['requests']): | 
					
						
							|  |  |  |             if i == 0 and req['server_name'] in ('indefero.soutade.fr'): break | 
					
						
							|  |  |  |             if req['server_name'] in ('blog.soutade.fr') and \ | 
					
						
							|  |  |  |                req['extract_request']['extract_uri'] in ('/', '/index.html'): | 
					
						
							|  |  |  |                 enlight = True | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |         visitor['enlight'] = enlight | 
					
						
							|  |  |  |     return filtered | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-03 08:58:47 +02:00
										 |  |  | filtered_users = [ | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  |     #[['country_code', '=', 'fr'], ['viewed_pages', '>=', '5'], ['viewed_hits', '>=', '5']], | 
					
						
							|  |  |  |     [my_filter], | 
					
						
							|  |  |  |     # [['country_code', '=', 'fr'], my_filter], | 
					
						
							| 
									
										
										
										
											2021-06-03 08:58:47 +02:00
										 |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-09 09:36:32 +02:00
										 |  |  | # Excluded IP | 
					
						
							|  |  |  | excluded_ip = [ | 
					
						
							| 
									
										
										
										
											2021-06-03 08:58:47 +02:00
										 |  |  |     r'192.168.*', # Local | 
					
						
							|  |  |  |     r'117.78.58.*', # China ecs-117-78-58-25.compute.hwclouds-dns.com | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  |     #'79.141.15.51', # Elsys | 
					
						
							|  |  |  |     #'165.225.20.107', # ST | 
					
						
							|  |  |  |     #'165.225.76.184', # ST #2 | 
					
						
							|  |  |  |     '147.161.180.110', # Schneider | 
					
						
							|  |  |  |     '147.161.182.108', # Schneider 2 | 
					
						
							|  |  |  |     '147.161.182.86',  # Schneider 3 | 
					
						
							| 
									
										
										
										
											2020-04-09 09:36:32 +02:00
										 |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Feeds url | 
					
						
							| 
									
										
										
										
											2021-06-03 08:58:47 +02:00
										 |  |  | feeds = [r'/atom.xml', r'/rss.xml'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  | # Feeds agent url | 
					
						
							|  |  |  | # feeds_agents = [r'.*feedly.com.*'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | merge_feeds_parsers = True | 
					
						
							|  |  |  | merge_feeds_parsers_list = [r'ec2-.*.compute-1.amazonaws.com'] | 
					
						
							| 
									
										
										
										
											2015-02-19 20:23:13 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-29 09:32:09 +01:00
										 |  |  | # Consider xml files as multimedia (append to current list) | 
					
						
							| 
									
										
										
										
											2020-05-01 09:55:42 +02:00
										 |  |  | multimedia_files_append = ['xml'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Don't count visitors that only do one hit (for a picture, ...) | 
					
						
							|  |  |  | count_hit_only_visitors = False | 
					
						
							| 
									
										
										
										
											2017-01-29 09:32:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Not all robots bandwidth (too big) | 
					
						
							|  |  |  | create_all_robot_bandwidth_page = False | 
					
						
							| 
									
										
										
										
											2023-03-25 08:11:57 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | #keep_requests = True |