Compare commits
No commits in common. "9b32a81ddbfe91a5edb705d9052c5a6f65223d39" and "4d0b993aecc9b468353031776bc87b2f3bb07a43" have entirely different histories.
9b32a81ddb
...
4d0b993aec
|
@ -38,9 +38,6 @@ pages_extensions = ['/', 'htm', 'html', 'xhtml', 'py', 'pl', 'rb', 'php']
|
||||||
# HTTP codes that are considered OK
|
# HTTP codes that are considered OK
|
||||||
viewed_http_codes = [200, 304]
|
viewed_http_codes = [200, 304]
|
||||||
|
|
||||||
# URL to ignore
|
|
||||||
ignore_url = []
|
|
||||||
|
|
||||||
# If False, doesn't cout visitors that doesn't GET a page but resources only (images, rss...)
|
# If False, doesn't cout visitors that doesn't GET a page but resources only (images, rss...)
|
||||||
count_hit_only_visitors = False
|
count_hit_only_visitors = False
|
||||||
|
|
||||||
|
@ -76,6 +73,3 @@ no_referrer_domains = []
|
||||||
|
|
||||||
# Domains used by robots
|
# Domains used by robots
|
||||||
robot_domains = []
|
robot_domains = []
|
||||||
|
|
||||||
# Feeds agent identifier
|
|
||||||
feeds_agents = [r'.*NextCloud-News']
|
|
||||||
|
|
25
iwla.py
25
iwla.py
|
@ -51,13 +51,10 @@ Conf values needed :
|
||||||
analyzed_filename
|
analyzed_filename
|
||||||
domain_name
|
domain_name
|
||||||
locales_path
|
locales_path
|
||||||
locale
|
|
||||||
keep_requests*
|
|
||||||
compress_output_files
|
compress_output_files
|
||||||
excluded_ip
|
excluded_ip
|
||||||
excluded_domain_name
|
excluded_domain_name
|
||||||
reverse_dns_timeout*
|
reverse_dns_timeout*
|
||||||
ignore_url*
|
|
||||||
|
|
||||||
Output files :
|
Output files :
|
||||||
DB_ROOT/meta.db
|
DB_ROOT/meta.db
|
||||||
|
@ -168,9 +165,6 @@ class IWLA(object):
|
||||||
self.excluded_domain_name = []
|
self.excluded_domain_name = []
|
||||||
for domain_name in conf.excluded_domain_name:
|
for domain_name in conf.excluded_domain_name:
|
||||||
self.excluded_domain_name += [re.compile(domain_name)]
|
self.excluded_domain_name += [re.compile(domain_name)]
|
||||||
self.ignore_url = []
|
|
||||||
for url in conf.ignore_url:
|
|
||||||
self.ignore_url += [re.compile(url)]
|
|
||||||
self.multimedia_files_re = []
|
self.multimedia_files_re = []
|
||||||
for file_re in conf.multimedia_files_re:
|
for file_re in conf.multimedia_files_re:
|
||||||
self.multimedia_files_re += [re.compile(file_re)]
|
self.multimedia_files_re += [re.compile(file_re)]
|
||||||
|
@ -371,24 +365,18 @@ class IWLA(object):
|
||||||
return hit['robot'] == True
|
return hit['robot'] == True
|
||||||
|
|
||||||
def _appendHit(self, hit):
|
def _appendHit(self, hit):
|
||||||
# Redirected page/hit
|
remote_ip = hit['remote_ip']
|
||||||
if int(hit['status']) in (301, 302, 307, 308):
|
|
||||||
return
|
|
||||||
|
|
||||||
remote_ip = hit['remote_ip']
|
|
||||||
if not remote_ip: return
|
if not remote_ip: return
|
||||||
|
|
||||||
for ip in self.excluded_ip:
|
for ip in self.excluded_ip:
|
||||||
if ip.match(remote_ip):
|
if ip.match(remote_ip):
|
||||||
return
|
return
|
||||||
|
|
||||||
request = hit['extract_request']
|
# Redirected page/hit
|
||||||
uri = request.get('extract_uri', request['http_uri'])
|
if int(hit['status']) in (301, 302, 307, 308):
|
||||||
|
return
|
||||||
|
|
||||||
for url in self.ignore_url:
|
|
||||||
if url.match(uri):
|
|
||||||
return
|
|
||||||
|
|
||||||
if not remote_ip in self.current_analysis['visits'].keys():
|
if not remote_ip in self.current_analysis['visits'].keys():
|
||||||
self._createVisitor(hit)
|
self._createVisitor(hit)
|
||||||
|
|
||||||
|
@ -403,6 +391,9 @@ class IWLA(object):
|
||||||
super_hit['bandwidth'][0] += int(hit['body_bytes_sent'])
|
super_hit['bandwidth'][0] += int(hit['body_bytes_sent'])
|
||||||
super_hit['last_access'] = self.meta_infos['last_time']
|
super_hit['last_access'] = self.meta_infos['last_time']
|
||||||
|
|
||||||
|
request = hit['extract_request']
|
||||||
|
uri = request.get('extract_uri', request['http_uri'])
|
||||||
|
|
||||||
hit['is_page'] = self.isPage(uri)
|
hit['is_page'] = self.isPage(uri)
|
||||||
|
|
||||||
if super_hit['robot'] or\
|
if super_hit['robot'] or\
|
||||||
|
|
|
@ -132,10 +132,7 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||||
# 2) Less than 1 hit per page
|
# 2) Less than 1 hit per page
|
||||||
if super_hit['viewed_pages'][0] and (super_hit['viewed_hits'][0] < super_hit['viewed_pages'][0]):
|
if super_hit['viewed_pages'][0] and (super_hit['viewed_hits'][0] < super_hit['viewed_pages'][0]):
|
||||||
isRobot = True
|
isRobot = True
|
||||||
# 2.5) 1 page, 1 hit
|
|
||||||
elif super_hit['viewed_pages'][0] == 1 and super_hit['viewed_hits'][0] == 1:
|
|
||||||
isRobot = True
|
|
||||||
|
|
||||||
if isRobot:
|
if isRobot:
|
||||||
self._setRobot(k, super_hit)
|
self._setRobot(k, super_hit)
|
||||||
continue
|
continue
|
||||||
|
@ -155,7 +152,8 @@ class IWLAPreAnalysisRobots(IPlugin):
|
||||||
|
|
||||||
# Exception for favicon.png and all apple-*icon*
|
# Exception for favicon.png and all apple-*icon*
|
||||||
if int(hit['status']) >= 400 and int(hit['status']) <= 499 and\
|
if int(hit['status']) >= 400 and int(hit['status']) <= 499 and\
|
||||||
'icon' not in hit['extract_request']['http_uri']:
|
'icon' not in hit['extract_request']['http_uri'] and\
|
||||||
|
hit['server_name'] != 'forge.soutade.fr':
|
||||||
error_codes += 1
|
error_codes += 1
|
||||||
elif int(hit['status']) in (304,):
|
elif int(hit['status']) in (304,):
|
||||||
not_modified_pages += 1
|
not_modified_pages += 1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user