Add reset feature

Allow to open .gz file transparently
Import debug in robots.py
This commit is contained in:
Gregory Soutade
2015-05-22 07:51:11 +02:00
parent 86fc5f2189
commit 4cb3b21ca5
3 changed files with 51 additions and 7 deletions

View File

@@ -20,6 +20,7 @@
import re
import logging
import inspect
from iwla import IWLA
from iplugin import IPlugin
@@ -66,7 +67,11 @@ class IWLAPreAnalysisRobots(IPlugin):
return True
def _setRobot(self, k, super_hit):
self.logger.debug('%s is a robot' % (k))
callerframerecord = inspect.stack()[1]
frame = callerframerecord[0]
info = inspect.getframeinfo(frame)
self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno))
super_hit['robot'] = 1
# Basic rule to detect robots
@@ -84,6 +89,7 @@ class IWLAPreAnalysisRobots(IPlugin):
if self.robot_re.match(first_page['http_user_agent']) or\
self.crawl_re.match(first_page['http_user_agent']):
self.logger.debug(first_page['http_user_agent'])
self._setRobot(k, super_hit)
continue
@@ -93,6 +99,7 @@ class IWLAPreAnalysisRobots(IPlugin):
break
if isRobot:
self.logger.debug(first_page['http_user_agent'])
self._setRobot(k, super_hit)
continue
@@ -103,6 +110,7 @@ class IWLAPreAnalysisRobots(IPlugin):
# 2) pages without hit --> robot
if not super_hit['viewed_hits']:
self.logger.debug(super_hit)
self._setRobot(k, super_hit)
continue