From 0c2ac431d129539d2885cd799a74e02bab0d9e30 Mon Sep 17 00:00:00 2001 From: Gregory Soutade Date: Thu, 3 Jun 2021 08:52:04 +0200 Subject: [PATCH] Be more strict with robots : requires at least 1 hit per viewed page --- plugins/pre_analysis/robots.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/plugins/pre_analysis/robots.py b/plugins/pre_analysis/robots.py index 9dbeb62..af71d27 100644 --- a/plugins/pre_analysis/robots.py +++ b/plugins/pre_analysis/robots.py @@ -108,8 +108,8 @@ class IWLAPreAnalysisRobots(IPlugin): # super_hit['robot'] = 1 # continue -# 2) pages without hit --> robot - if not super_hit['viewed_hits'][0] and super_hit['viewed_pages'][0]: +# 2) Less than 1 hit per page + if super_hit['viewed_pages'][0] and (super_hit['viewed_hits'][0] < super_hit['viewed_pages'][0]): self._setRobot(k, super_hit) continue @@ -118,12 +118,6 @@ class IWLAPreAnalysisRobots(IPlugin): self._setRobot(k, super_hit) continue -# 4) pages without hit --> robot - if not super_hit['viewed_hits'][0] and super_hit['viewed_pages'][0]: - self.logger.debug(super_hit) - self._setRobot(k, super_hit) - continue - not_found_pages = 0 for hit in super_hit['requests']: # 5) /robots.txt read