#DB_ROOT = './output_db' #DISPLAY_ROOT = './output_dev' # Web server log analyzed_filename = '/var/log/apache2/soutade.fr_access.log.1,/var/log/apache2/soutade.fr_access.log' # Domain name to analyze domain_name = 'soutade.fr' # Display visitor IP in addition to resolved names display_visitor_ip = True # Hooks used pre_analysis_hooks = ['page_to_hit', 'robots'] post_analysis_hooks = ['reverse_dns', 'referers', 'top_pages', 'subdomains', 'top_downloads', 'operating_systems', 'browsers', 'hours_stats', 'feeds', 'ip_to_geo', 'filter_users'] display_hooks = ['filter_users', 'top_visitors', 'all_visits', 'referers', 'top_pages', 'subdomains', 'top_downloads', 'referers_diff', 'ip_to_geo', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'top_downloads_diff', 'robot_bandwidth', 'top_pages_diff', 'all_visits_enlight'] # Reverse DNS timeout reverse_dns_timeout = 0.2 # Count this addresses as hit page_to_hit_conf = [r'.+/logo[/]?', r'.+/.+\.py'] # Count this addresses as page hit_to_page_conf = [ # Blog r'.+/category/.+', r'.+/tag/.+', r'.+/archive/.+', r'.+/ljdc[/]?', r'.*/search/.+', # Indefero r'.+/source/tree/.*', r'.+/source/file/.*', r'.*/index$', # Denote r'.*/edit$', r'.*/add$', r'.+/[0-9]+$', r'.*/preferences$', r'.*/search$', r'.*/public_notes$', r'.*/template.*', r'.*/templates$', # Music r'.*/music/.*', ] # Because it's too long to build HTML when there is too much entries max_hits_displayed = 100 max_downloads_displayed = 100 # Locale in French locale = 'fr' # Filtered IP filtered_ip = ['82.232.68.211', '78.153.243.190', '176.152.215.133', '83.199.87.88', # Lanion '193.136.115.1' # Lisbon ] import re # google_re = re.compile('.*google.*') # duck_re = re.compile('.*duckduckgo.*') soutade_re = re.compile('.*soutade.fr.*') def my_filter(iwla, visitor): # Manage filtered users if visitor.get('filtered', False): return True filtered = False req = visitor['requests'][0] if visitor.get('country_code', '') == 'fr' and\ req['server_name'] in ('blog.soutade.fr', 'www.soutade.fr', 'soutade.fr') and \ req['extract_request']['extract_uri'] in ('/', '/index.html', '/about.html'): referer = req['extract_referer']['extract_uri'] if referer in ('', '-'): # print(f'{req} MATCHED') filtered = True elif not soutade_re.match(referer): # if google_re.match(referer) or duck_re.match(referer): # print(f'{req} MATCHED') filtered = True # Manage enlight users if visitor.get('enlight', None) is None and not visitor.get('feed_parser', False): enlight = False for i, req in enumerate(visitor['requests']): if i == 0 and req['server_name'] in ('indefero.soutade.fr'): break if req['server_name'] in ('blog.soutade.fr') and \ req['extract_request']['extract_uri'] in ('/', '/index.html'): enlight = True break visitor['enlight'] = enlight return filtered filtered_users = [ #[['country_code', '=', 'fr'], ['viewed_pages', '>=', '5'], ['viewed_hits', '>=', '5']], [my_filter], # [['country_code', '=', 'fr'], my_filter], ] # Excluded IP excluded_ip = [ r'192.168.*', # Local r'117.78.58.*', # China ecs-117-78-58-25.compute.hwclouds-dns.com #'79.141.15.51', # Elsys #'165.225.20.107', # ST #'165.225.76.184', # ST #2 '147.161.180.110', # Schneider '147.161.182.108', # Schneider 2 '147.161.182.86', # Schneider 3 ] # Feeds url feeds = [r'/atom.xml', r'/rss.xml'] # Feeds agent url # feeds_agents = [r'.*feedly.com.*'] merge_feeds_parsers = True merge_feeds_parsers_list = [r'ec2-.*.compute-1.amazonaws.com'] # Consider xml files as multimedia (append to current list) multimedia_files_append = ['xml'] # Don't count visitors that only do one hit (for a picture, ...) count_hit_only_visitors = False # Not all robots bandwidth (too big) create_all_robot_bandwidth_page = False #keep_requests = True