Make backup before compressing (low memory servers)
Fix error : Call post hook plugins even in display only mode Don't compute unordered hits (remove pasts if they are found after current) Remove tags in stats diff Don't do geolocalisation is visitor is not valid Don't try to find search engine on robots Update robot check rules Add top_pages_diff plugin
This commit is contained in:
		| @@ -109,6 +109,16 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|             #     continue | ||||
|  | ||||
| # 2) pages without hit --> robot | ||||
|             if not super_hit['viewed_hits'][0] and super_hit['viewed_pages'][0]: | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|  | ||||
| # 3) no pages and not hit --> robot | ||||
|             if not super_hit['viewed_hits'][0] and not super_hit['viewed_pages'][0]: | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|  | ||||
| # 4) pages without hit --> robot | ||||
|             if not super_hit['viewed_hits'][0] and super_hit['viewed_pages'][0]: | ||||
|                 self.logger.debug(super_hit) | ||||
|                 self._setRobot(k, super_hit) | ||||
| @@ -116,15 +126,15 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|  | ||||
|             not_found_pages = 0 | ||||
|             for hit in super_hit['requests']: | ||||
| # 3) /robots.txt read | ||||
| # 5) /robots.txt read | ||||
|                 if hit['extract_request']['http_uri'].endswith('/robots.txt'): | ||||
|                     self._setRobot(k, super_hit) | ||||
|                     break | ||||
|  | ||||
|                 if int(hit['status']) == 404: | ||||
|                 if int(hit['status']) == 404 or int(hit['status']) == 403: | ||||
|                     not_found_pages += 1 | ||||
|  | ||||
| # 4) Any referer for hits | ||||
| # 6) Any referer for hits | ||||
|                 if not hit['is_page'] and hit['http_referer']: | ||||
|                     referers += 1 | ||||
|  | ||||
| @@ -132,7 +142,7 @@ class IWLAPreAnalysisRobots(IPlugin): | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|  | ||||
| # 5) more than 10 404 pages | ||||
| # 7) more than 10 404/403 pages | ||||
|             if not_found_pages > 10: | ||||
|                 self._setRobot(k, super_hit) | ||||
|                 continue | ||||
|   | ||||
		Reference in New Issue
	
	Block a user