iwla/conf.py

119 lines
4.0 KiB
Python
Raw Permalink Normal View History

2023-03-25 08:11:57 +01:00
#DB_ROOT = './output_db'
#DISPLAY_ROOT = './output_dev'
2014-11-20 15:25:43 +01:00
2014-12-08 18:38:40 +01:00
# Web server log
2023-03-25 08:11:57 +01:00
analyzed_filename = '/var/log/apache2/soutade.fr_access.log.1,/var/log/apache2/soutade.fr_access.log'
2014-11-20 15:25:43 +01:00
2014-12-08 18:38:40 +01:00
# Domain name to analyze
2014-11-26 16:17:16 +01:00
domain_name = 'soutade.fr'
2014-12-08 18:38:40 +01:00
# Display visitor IP in addition to resolved names
2015-02-19 20:23:13 +01:00
display_visitor_ip = True
2014-11-26 16:17:16 +01:00
2014-12-08 18:38:40 +01:00
# Hooks used
2014-11-25 16:22:07 +01:00
pre_analysis_hooks = ['page_to_hit', 'robots']
2023-03-25 08:11:57 +01:00
post_analysis_hooks = ['reverse_dns', 'referers', 'top_pages', 'subdomains', 'top_downloads', 'operating_systems', 'browsers', 'hours_stats', 'feeds', 'ip_to_geo', 'filter_users']
display_hooks = ['filter_users', 'top_visitors', 'all_visits', 'referers', 'top_pages', 'subdomains', 'top_downloads', 'referers_diff', 'ip_to_geo', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'top_downloads_diff', 'robot_bandwidth', 'top_pages_diff', 'all_visits_enlight']
2014-11-21 10:41:29 +01:00
2014-12-08 18:38:40 +01:00
# Reverse DNS timeout
2014-11-24 21:37:37 +01:00
reverse_dns_timeout = 0.2
2014-12-08 18:38:40 +01:00
# Count this addresses as hit
2023-03-25 08:11:57 +01:00
page_to_hit_conf = [r'.+/logo[/]?', r'.+/.+\.py']
2014-12-08 18:38:40 +01:00
# Count this addresses as page
2023-03-25 08:11:57 +01:00
hit_to_page_conf = [
# Blog
r'.+/category/.+', r'.+/tag/.+', r'.+/archive/.+', r'.+/ljdc[/]?', r'.*/search/.+',
# Indefero
r'.+/source/tree/.*', r'.+/source/file/.*', r'.*/index$',
# Denote
r'.*/edit$', r'.*/add$', r'.+/[0-9]+$', r'.*/preferences$', r'.*/search$', r'.*/public_notes$', r'.*/template.*', r'.*/templates$',
# Music
r'.*/music/.*',
]
# Because it's too long to build HTML when there is too much entries
max_hits_displayed = 100
max_downloads_displayed = 100
2014-12-15 21:28:25 +01:00
2017-01-29 09:32:09 +01:00
# Locale in French
2023-03-25 08:11:57 +01:00
locale = 'fr'
2015-02-19 20:23:13 +01:00
2021-06-03 08:58:47 +02:00
# Filtered IP
2023-03-25 08:11:57 +01:00
filtered_ip = ['82.232.68.211', '78.153.243.190', '176.152.215.133',
'83.199.87.88', # Lanion
'193.136.115.1' # Lisbon
]
import re
# google_re = re.compile('.*google.*')
# duck_re = re.compile('.*duckduckgo.*')
soutade_re = re.compile('.*soutade.fr.*')
def my_filter(iwla, visitor):
# Manage filtered users
if visitor.get('filtered', False): return True
filtered = False
req = visitor['requests'][0]
if visitor.get('country_code', '') == 'fr' and\
req['server_name'] in ('blog.soutade.fr', 'www.soutade.fr', 'soutade.fr') and \
req['extract_request']['extract_uri'] in ('/', '/index.html', '/about.html'):
referer = req['extract_referer']['extract_uri']
if referer in ('', '-'):
# print(f'{req} MATCHED')
filtered = True
elif not soutade_re.match(referer):
# if google_re.match(referer) or duck_re.match(referer):
# print(f'{req} MATCHED')
filtered = True
# Manage enlight users
if visitor.get('enlight', None) is None and not visitor.get('feed_parser', False):
enlight = False
for i, req in enumerate(visitor['requests']):
if i == 0 and req['server_name'] in ('indefero.soutade.fr'): break
if req['server_name'] in ('blog.soutade.fr') and \
req['extract_request']['extract_uri'] in ('/', '/index.html'):
enlight = True
break
visitor['enlight'] = enlight
return filtered
2021-06-03 08:58:47 +02:00
filtered_users = [
2023-03-25 08:11:57 +01:00
#[['country_code', '=', 'fr'], ['viewed_pages', '>=', '5'], ['viewed_hits', '>=', '5']],
[my_filter],
# [['country_code', '=', 'fr'], my_filter],
2021-06-03 08:58:47 +02:00
]
2020-04-09 09:36:32 +02:00
# Excluded IP
excluded_ip = [
2021-06-03 08:58:47 +02:00
r'192.168.*', # Local
r'117.78.58.*', # China ecs-117-78-58-25.compute.hwclouds-dns.com
2023-03-25 08:11:57 +01:00
#'79.141.15.51', # Elsys
#'165.225.20.107', # ST
#'165.225.76.184', # ST #2
'147.161.180.110', # Schneider
'147.161.182.108', # Schneider 2
'147.161.182.86', # Schneider 3
2020-04-09 09:36:32 +02:00
]
# Feeds url
2021-06-03 08:58:47 +02:00
feeds = [r'/atom.xml', r'/rss.xml']
2023-03-25 08:11:57 +01:00
# Feeds agent url
# feeds_agents = [r'.*feedly.com.*']
merge_feeds_parsers = True
merge_feeds_parsers_list = [r'ec2-.*.compute-1.amazonaws.com']
2015-02-19 20:23:13 +01:00
2017-01-29 09:32:09 +01:00
# Consider xml files as multimedia (append to current list)
2020-05-01 09:55:42 +02:00
multimedia_files_append = ['xml']
# Don't count visitors that only do one hit (for a picture, ...)
count_hit_only_visitors = False
2017-01-29 09:32:09 +01:00
# Not all robots bandwidth (too big)
create_all_robot_bandwidth_page = False
2023-03-25 08:11:57 +01:00
#keep_requests = True