Add reset feature
Allow to open .gz file transparently Import debug in robots.py
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
#
|
||||
|
||||
import re
|
||||
import logging
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
@@ -64,6 +65,7 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||
self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', [])
|
||||
self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps)
|
||||
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
return True
|
||||
|
||||
def hook(self):
|
||||
@@ -85,7 +87,7 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||
# Page to hit
|
||||
for regexp in self.ph_regexps:
|
||||
if regexp.match(uri):
|
||||
#print '%s is a hit' % (uri )
|
||||
self.logger.debug('%s changed from page to hit' % (uri))
|
||||
request['is_page'] = False
|
||||
super_hit['viewed_pages'] -= 1
|
||||
super_hit['viewed_hits'] += 1
|
||||
@@ -94,7 +96,7 @@ class IWLAPreAnalysisPageToHit(IPlugin):
|
||||
# Hit to page
|
||||
for regexp in self.hp_regexps:
|
||||
if regexp.match(uri):
|
||||
#print '%s is a page' % (uri )
|
||||
self.logger.debug('%s changed from hit to page' % (uri))
|
||||
request['is_page'] = True
|
||||
super_hit['viewed_pages'] += 1
|
||||
super_hit['viewed_hits'] -= 1
|
||||
|
@@ -20,6 +20,7 @@
|
||||
|
||||
import re
|
||||
import logging
|
||||
import inspect
|
||||
|
||||
from iwla import IWLA
|
||||
from iplugin import IPlugin
|
||||
@@ -66,7 +67,11 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||
return True
|
||||
|
||||
def _setRobot(self, k, super_hit):
|
||||
self.logger.debug('%s is a robot' % (k))
|
||||
callerframerecord = inspect.stack()[1]
|
||||
frame = callerframerecord[0]
|
||||
info = inspect.getframeinfo(frame)
|
||||
|
||||
self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno))
|
||||
super_hit['robot'] = 1
|
||||
|
||||
# Basic rule to detect robots
|
||||
@@ -84,6 +89,7 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||
|
||||
if self.robot_re.match(first_page['http_user_agent']) or\
|
||||
self.crawl_re.match(first_page['http_user_agent']):
|
||||
self.logger.debug(first_page['http_user_agent'])
|
||||
self._setRobot(k, super_hit)
|
||||
continue
|
||||
|
||||
@@ -93,6 +99,7 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||
break
|
||||
|
||||
if isRobot:
|
||||
self.logger.debug(first_page['http_user_agent'])
|
||||
self._setRobot(k, super_hit)
|
||||
continue
|
||||
|
||||
@@ -103,6 +110,7 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||
|
||||
# 2) pages without hit --> robot
|
||||
if not super_hit['viewed_hits']:
|
||||
self.logger.debug(super_hit)
|
||||
self._setRobot(k, super_hit)
|
||||
continue
|
||||
|
||||
|
Reference in New Issue
Block a user