Add no_referrer_domains list to defaut_conf for website that defines this policy

This commit is contained in:
Gregory Soutade 2024-01-30 11:20:19 +01:00
parent f1ffbe40d8
commit 974d355dd4
2 changed files with 11 additions and 8 deletions

View File

@ -66,3 +66,6 @@ keep_requests = False
# Domain names that should be ignored # Domain names that should be ignored
excluded_domain_name = [] excluded_domain_name = []
# Domains that set no-referer as Referer-Policy
no_referrer_domains = []

View File

@ -36,7 +36,8 @@ Plugin requirements :
None None
Conf values needed : Conf values needed :
None count_hit_only_visitors
no_referrer_domains
Output files : Output files :
None None
@ -63,6 +64,7 @@ class IWLAPreAnalysisRobots(IPlugin):
self.compatible_re = re.compile(r'.*\(.*compatible; (.*); \+.*\)*') self.compatible_re = re.compile(r'.*\(.*compatible; (.*); \+.*\)*')
self.logger = logging.getLogger(self.__class__.__name__) self.logger = logging.getLogger(self.__class__.__name__)
self.one_hit_only = self.iwla.getConfValue('count_hit_only_visitors', False) self.one_hit_only = self.iwla.getConfValue('count_hit_only_visitors', False)
self.no_referrer_domains = self.iwla.getConfValue('no_referrer_domains', [])
return True return True
@ -125,11 +127,6 @@ class IWLAPreAnalysisRobots(IPlugin):
# 2) Less than 1 hit per page # 2) Less than 1 hit per page
if super_hit['viewed_pages'][0] and (super_hit['viewed_hits'][0] < super_hit['viewed_pages'][0]): if super_hit['viewed_pages'][0] and (super_hit['viewed_hits'][0] < super_hit['viewed_pages'][0]):
isRobot = True isRobot = True
for hit in super_hit['requests']:
if hit['server_name'] == 'indefero.soutade.fr':
if super_hit['viewed_hits'][0]*3 >= super_hit['viewed_pages'][0]:
isRobot = False
break
if isRobot: if isRobot:
self._setRobot(k, super_hit) self._setRobot(k, super_hit)
@ -148,7 +145,9 @@ class IWLAPreAnalysisRobots(IPlugin):
self._setRobot(k, super_hit) self._setRobot(k, super_hit)
break break
if int(hit['status']) >= 400 and int(hit['status']) <= 499: # Exception for favicon.png and all apple-*icon*
if int(hit['status']) >= 400 and int(hit['status']) <= 499 and\
'icon' not in hit['extract_request']['http_uri']:
error_codes += 1 error_codes += 1
elif int(hit['status']) in (304,): elif int(hit['status']) in (304,):
not_modified_pages += 1 not_modified_pages += 1
@ -161,7 +160,8 @@ class IWLAPreAnalysisRobots(IPlugin):
continue continue
# 6) Any referer for hits # 6) Any referer for hits
if super_hit['viewed_hits'][0] and not referers: if super_hit['viewed_hits'][0] and not referers and\
not super_hit['requests'][0]['server_name'] in self.no_referrer_domains:
self._setRobot(k, super_hit) self._setRobot(k, super_hit)
continue continue