Find robot name in 'compatible' string and group them

This commit is contained in:
Gregory Soutade
2023-01-28 09:38:59 +01:00
parent 9c57ad3ece
commit ac246eabe2
2 changed files with 26 additions and 13 deletions

View File

@@ -63,6 +63,7 @@ class IWLAPreAnalysisRobots(IPlugin):
self.awstats_robots = list(map(lambda x : re.compile(('.*%s.*') % (x), re.IGNORECASE), awstats_data.robots))
self.robot_re = re.compile(r'.*bot.*', re.IGNORECASE)
self.crawl_re = re.compile(r'.*crawl.*', re.IGNORECASE)
self.compatible_re = re.compile(r'.*\(.*compatible; (.*); \+.*\)*')
self.logger = logging.getLogger(self.__class__.__name__)
return True
@@ -75,6 +76,9 @@ class IWLAPreAnalysisRobots(IPlugin):
self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno))
super_hit['robot'] = 1
super_hit['keep_requests'] = False
robot_name = self.compatible_re.match(super_hit['requests'][0]['http_user_agent'])
if robot_name:
super_hit['robot_name'] = robot_name[1]
# Basic rule to detect robots
def hook(self):