Make backup before compressing (low memory servers)

Fix error : Call post hook plugins even in display only mode
Don't compute unordered hits (remove pasts if they are found after current)
Remove tags in stats diff
Don't do geolocalisation is visitor is not valid
Don't try to find search engine on robots
Update robot check rules
Add top_pages_diff plugin
This commit is contained in:
Gregory Soutade
2019-08-30 07:50:54 +02:00
parent ed6ed68706
commit bb268114b2
6 changed files with 131 additions and 36 deletions

View File

@@ -109,6 +109,16 @@ class IWLAPreAnalysisRobots(IPlugin):
# continue
# 2) pages without hit --> robot
if not super_hit['viewed_hits'][0] and super_hit['viewed_pages'][0]:
self._setRobot(k, super_hit)
continue
# 3) no pages and not hit --> robot
if not super_hit['viewed_hits'][0] and not super_hit['viewed_pages'][0]:
self._setRobot(k, super_hit)
continue
# 4) pages without hit --> robot
if not super_hit['viewed_hits'][0] and super_hit['viewed_pages'][0]:
self.logger.debug(super_hit)
self._setRobot(k, super_hit)
@@ -116,15 +126,15 @@ class IWLAPreAnalysisRobots(IPlugin):
not_found_pages = 0
for hit in super_hit['requests']:
# 3) /robots.txt read
# 5) /robots.txt read
if hit['extract_request']['http_uri'].endswith('/robots.txt'):
self._setRobot(k, super_hit)
break
if int(hit['status']) == 404:
if int(hit['status']) == 404 or int(hit['status']) == 403:
not_found_pages += 1
# 4) Any referer for hits
# 6) Any referer for hits
if not hit['is_page'] and hit['http_referer']:
referers += 1
@@ -132,7 +142,7 @@ class IWLAPreAnalysisRobots(IPlugin):
self._setRobot(k, super_hit)
continue
# 5) more than 10 404 pages
# 7) more than 10 404/403 pages
if not_found_pages > 10:
self._setRobot(k, super_hit)
continue