Add reset feature
Allow to open .gz file transparently Import debug in robots.py
This commit is contained in:
		| @@ -19,6 +19,7 @@ | ||||
| # | ||||
|  | ||||
| import re | ||||
| import logging | ||||
|  | ||||
| from iwla import IWLA | ||||
| from iplugin import IPlugin | ||||
| @@ -64,6 +65,7 @@ class IWLAPreAnalysisPageToHit(IPlugin): | ||||
|         self.hp_regexps = self.iwla.getConfValue('hit_to_page_conf', []) | ||||
|         self.hp_regexps = map(lambda(r): re.compile(r), self.hp_regexps) | ||||
|  | ||||
|         self.logger = logging.getLogger(self.__class__.__name__) | ||||
|         return True | ||||
|  | ||||
|     def hook(self): | ||||
| @@ -85,7 +87,7 @@ class IWLAPreAnalysisPageToHit(IPlugin): | ||||
|                     # Page to hit | ||||
|                     for regexp in self.ph_regexps: | ||||
|                         if regexp.match(uri): | ||||
|                             #print '%s is a hit' % (uri ) | ||||
|                             self.logger.debug('%s changed from page to hit' % (uri)) | ||||
|                             request['is_page'] = False | ||||
|                             super_hit['viewed_pages'] -= 1 | ||||
|                             super_hit['viewed_hits'] += 1 | ||||
| @@ -94,7 +96,7 @@ class IWLAPreAnalysisPageToHit(IPlugin): | ||||
|                     # Hit to page | ||||
|                     for regexp in self.hp_regexps: | ||||
|                         if regexp.match(uri): | ||||
|                             #print '%s is a page' % (uri ) | ||||
|                             self.logger.debug('%s changed from hit to page' % (uri)) | ||||
|                             request['is_page'] = True | ||||
|                             super_hit['viewed_pages'] += 1 | ||||
|                             super_hit['viewed_hits'] -= 1 | ||||
|   | ||||
| @@ -20,6 +20,7 @@ | ||||
|  | ||||
| import re | ||||
| import logging | ||||
| import inspect | ||||
|  | ||||
| from iwla import IWLA | ||||
| from iplugin import IPlugin | ||||
| @@ -66,7 +67,11 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|         return True | ||||
|  | ||||
|     def _setRobot(self, k, super_hit): | ||||
|         self.logger.debug('%s is a robot' % (k)) | ||||
|         callerframerecord = inspect.stack()[1] | ||||
|         frame = callerframerecord[0] | ||||
|         info = inspect.getframeinfo(frame) | ||||
|  | ||||
|         self.logger.debug('%s is a robot (caller %s:%d)' % (k, info.function, info.lineno)) | ||||
|         super_hit['robot'] = 1 | ||||
|  | ||||
| # Basic rule to detect robots | ||||
| @@ -84,6 +89,7 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|  | ||||
|             if self.robot_re.match(first_page['http_user_agent']) or\ | ||||
|                     self.crawl_re.match(first_page['http_user_agent']): | ||||
|                 self.logger.debug(first_page['http_user_agent']) | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|  | ||||
| @@ -93,6 +99,7 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|                     break | ||||
|  | ||||
|             if isRobot: | ||||
|                 self.logger.debug(first_page['http_user_agent']) | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|  | ||||
| @@ -103,6 +110,7 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|  | ||||
| # 2) pages without hit --> robot | ||||
|             if not super_hit['viewed_hits']: | ||||
|                 self.logger.debug(super_hit) | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user