From 7b0ca661a12d512582c13225f9438d744a930c25 Mon Sep 17 00:00:00 2001 From: Gregory Soutade Date: Mon, 3 Feb 2025 08:00:25 +0100 Subject: [PATCH] Add rule for robot : forbid only "1 page and 1 hit" --- plugins/pre_analysis/robots.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/plugins/pre_analysis/robots.py b/plugins/pre_analysis/robots.py index dbc1148..c9e7cf4 100644 --- a/plugins/pre_analysis/robots.py +++ b/plugins/pre_analysis/robots.py @@ -132,7 +132,10 @@ class IWLAPreAnalysisRobots(IPlugin): # 2) Less than 1 hit per page if super_hit['viewed_pages'][0] and (super_hit['viewed_hits'][0] < super_hit['viewed_pages'][0]): isRobot = True - +# 2.5) 1 page, 1 hit + elif super_hit['viewed_pages'][0] == 1 and super_hit['viewed_hits'][0] == 1: + isRobot = True + if isRobot: self._setRobot(k, super_hit) continue @@ -152,8 +155,7 @@ class IWLAPreAnalysisRobots(IPlugin): # Exception for favicon.png and all apple-*icon* if int(hit['status']) >= 400 and int(hit['status']) <= 499 and\ - 'icon' not in hit['extract_request']['http_uri'] and\ - hit['server_name'] != 'forge.soutade.fr': + 'icon' not in hit['extract_request']['http_uri']: error_codes += 1 elif int(hit['status']) in (304,): not_modified_pages += 1