From 71d8ee2113aeaef78b9d26f0acd89a6a8f3b8691 Mon Sep 17 00:00:00 2001 From: Gregory Soutade Date: Sat, 25 Mar 2023 08:11:57 +0100 Subject: [PATCH] Forgot Firefox icon --- conf.py | 90 +++++++++++++++++++++++------ plugins/pre_analysis/robots.py | 5 ++ resources/icon/browser/firefox.png | Bin 0 -> 250 bytes 3 files changed, 77 insertions(+), 18 deletions(-) create mode 100644 resources/icon/browser/firefox.png diff --git a/conf.py b/conf.py index 15cd519..e299e0e 100644 --- a/conf.py +++ b/conf.py @@ -1,6 +1,8 @@ +#DB_ROOT = './output_db' +#DISPLAY_ROOT = './output_dev' # Web server log -analyzed_filename = '/var/log/apache2/access.log.1,/var/log/apache2/access.log' +analyzed_filename = '/var/log/apache2/soutade.fr_access.log.1,/var/log/apache2/soutade.fr_access.log' # Domain name to analyze domain_name = 'soutade.fr' @@ -10,49 +12,99 @@ display_visitor_ip = True # Hooks used pre_analysis_hooks = ['page_to_hit', 'robots'] -post_analysis_hooks = ['referers', 'top_pages', 'top_downloads', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'reverse_dns', 'ip_to_geo'] -display_hooks = ['filter_users', 'top_visitors', 'all_visits', 'referers', 'top_pages', 'top_downloads', 'referers_diff', 'ip_to_geo', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'top_downloads_diff', 'robot_bandwidth', 'top_pages_diff'] +post_analysis_hooks = ['reverse_dns', 'referers', 'top_pages', 'subdomains', 'top_downloads', 'operating_systems', 'browsers', 'hours_stats', 'feeds', 'ip_to_geo', 'filter_users'] +display_hooks = ['filter_users', 'top_visitors', 'all_visits', 'referers', 'top_pages', 'subdomains', 'top_downloads', 'referers_diff', 'ip_to_geo', 'operating_systems', 'browsers', 'feeds', 'hours_stats', 'top_downloads_diff', 'robot_bandwidth', 'top_pages_diff', 'all_visits_enlight'] # Reverse DNS timeout reverse_dns_timeout = 0.2 # Count this addresses as hit -page_to_hit_conf = [r'^.+/logo[/]?$'] +page_to_hit_conf = [r'.+/logo[/]?', r'.+/.+\.py'] # Count this addresses as page -hit_to_page_conf = [r'^.+/category/.+$', r'^.+/tag/.+$', r'^.+/archive/.+$', r'^.+/ljdc[/]?$', r'^.+/source/tree/.*$', r'^.+/source/file/.*$', r'^.+/search/.+$'] +hit_to_page_conf = [ + # Blog + r'.+/category/.+', r'.+/tag/.+', r'.+/archive/.+', r'.+/ljdc[/]?', r'.*/search/.+', + # Indefero + r'.+/source/tree/.*', r'.+/source/file/.*', r'.*/index$', + # Denote + r'.*/edit$', r'.*/add$', r'.+/[0-9]+$', r'.*/preferences$', r'.*/search$', r'.*/public_notes$', r'.*/template.*', r'.*/templates$', + # Music + r'.*/music/.*', +] # Because it's too long to build HTML when there is too much entries max_hits_displayed = 100 max_downloads_displayed = 100 -# Compressed files -compress_output_files = ['html', 'css', 'js'] - # Locale in French -#locale = 'fr' - -# Tracked IP -tracked_ip = ['192.168.1.1'] +locale = 'fr' # Filtered IP -filtered_ip = [ - # r'192.168.*', # Local -] +filtered_ip = ['82.232.68.211', '78.153.243.190', '176.152.215.133', + '83.199.87.88', # Lanion + '193.136.115.1' # Lisbon + ] + +import re +# google_re = re.compile('.*google.*') +# duck_re = re.compile('.*duckduckgo.*') +soutade_re = re.compile('.*soutade.fr.*') + +def my_filter(iwla, visitor): + # Manage filtered users + if visitor.get('filtered', False): return True + filtered = False + req = visitor['requests'][0] + if visitor.get('country_code', '') == 'fr' and\ + req['server_name'] in ('blog.soutade.fr', 'www.soutade.fr', 'soutade.fr') and \ + req['extract_request']['extract_uri'] in ('/', '/index.html', '/about.html'): + referer = req['extract_referer']['extract_uri'] + if referer in ('', '-'): + # print(f'{req} MATCHED') + filtered = True + elif not soutade_re.match(referer): + # if google_re.match(referer) or duck_re.match(referer): + # print(f'{req} MATCHED') + filtered = True + + # Manage enlight users + if visitor.get('enlight', None) is None and not visitor.get('feed_parser', False): + enlight = False + for i, req in enumerate(visitor['requests']): + if i == 0 and req['server_name'] in ('indefero.soutade.fr'): break + if req['server_name'] in ('blog.soutade.fr') and \ + req['extract_request']['extract_uri'] in ('/', '/index.html'): + enlight = True + break + visitor['enlight'] = enlight + return filtered + filtered_users = [ -# [['country_code', '=', 'cn'], ['viewed_pages', '>=', '100']], + #[['country_code', '=', 'fr'], ['viewed_pages', '>=', '5'], ['viewed_hits', '>=', '5']], + [my_filter], + # [['country_code', '=', 'fr'], my_filter], ] # Excluded IP excluded_ip = [ r'192.168.*', # Local r'117.78.58.*', # China ecs-117-78-58-25.compute.hwclouds-dns.com + #'79.141.15.51', # Elsys + #'165.225.20.107', # ST + #'165.225.76.184', # ST #2 + '147.161.180.110', # Schneider + '147.161.182.108', # Schneider 2 + '147.161.182.86', # Schneider 3 ] # Feeds url feeds = [r'/atom.xml', r'/rss.xml'] -# Feeds referers url -feeds_referers = ['https://feedly.com'] +# Feeds agent url +# feeds_agents = [r'.*feedly.com.*'] + +merge_feeds_parsers = True +merge_feeds_parsers_list = [r'ec2-.*.compute-1.amazonaws.com'] # Consider xml files as multimedia (append to current list) multimedia_files_append = ['xml'] @@ -62,3 +114,5 @@ count_hit_only_visitors = False # Not all robots bandwidth (too big) create_all_robot_bandwidth_page = False + +#keep_requests = True diff --git a/plugins/pre_analysis/robots.py b/plugins/pre_analysis/robots.py index 678ce13..9367b6e 100644 --- a/plugins/pre_analysis/robots.py +++ b/plugins/pre_analysis/robots.py @@ -125,6 +125,11 @@ class IWLAPreAnalysisRobots(IPlugin): # 2) Less than 1 hit per page if super_hit['viewed_pages'][0] and (super_hit['viewed_hits'][0] < super_hit['viewed_pages'][0]): isRobot = True + for hit in super_hit['requests']: + if hit['server_name'] == 'indefero.soutade.fr': + if super_hit['viewed_hits'][0]*3 >= super_hit['viewed_pages'][0]: + isRobot = False + break if isRobot: self._setRobot(k, super_hit) diff --git a/resources/icon/browser/firefox.png b/resources/icon/browser/firefox.png new file mode 100644 index 0000000000000000000000000000000000000000..1cdd1d2d899925171dcb3b56531a3faec2349029 GIT binary patch literal 250 zcmeAS@N?(olHy`uVBq!ia0vp^d?3uh3?wzC-F*zC3<7*YTz~)mf9>4qPai(aSiIe1 z%KrHi+P?2}cs14d@!GUCwO-~9QS7A~UslU4_EIYFbH5YHxAjhL2~aO%NswPKgTu2M zX+Tber;B4q#Vy%$Cm9(OIhY;znmfAwmhawBY1qQ