Dont count 'uri' and 'uri/' as differents uri
This commit is contained in:
parent
0bc832f87c
commit
73e4b0d8a6
|
@ -1,4 +1,4 @@
|
||||||
v0.3 (13/07/2015)
|
v0.3 (20/12/2015)
|
||||||
** User **
|
** User **
|
||||||
Add referers_diff display plugin
|
Add referers_diff display plugin
|
||||||
Add year statistics in month details
|
Add year statistics in month details
|
||||||
|
@ -21,6 +21,7 @@ v0.3 (13/07/2015)
|
||||||
Sort documentation output
|
Sort documentation output
|
||||||
Add debug traces in robots plugin
|
Add debug traces in robots plugin
|
||||||
Update awstats data
|
Update awstats data
|
||||||
|
Dont count 'uri' and 'uri/' as differents uri
|
||||||
|
|
||||||
** Bugs **
|
** Bugs **
|
||||||
Forgot <body> tag
|
Forgot <body> tag
|
||||||
|
|
19
iwla.py
19
iwla.py
|
@ -148,6 +148,7 @@ class IWLA(object):
|
||||||
self.log_re = re.compile(self.log_format_extracted)
|
self.log_re = re.compile(self.log_format_extracted)
|
||||||
self.uri_re = re.compile(r'(?P<extract_uri>[^\?#]+)(\?(?P<extract_parameters>[^#]+))?(#.*)?')
|
self.uri_re = re.compile(r'(?P<extract_uri>[^\?#]+)(\?(?P<extract_parameters>[^#]+))?(#.*)?')
|
||||||
self.domain_name_re = re.compile(r'.*%s' % conf.domain_name)
|
self.domain_name_re = re.compile(r'.*%s' % conf.domain_name)
|
||||||
|
self.normalize_uri_final_slashes = re.compile(r'/+$')
|
||||||
self.plugins = [(conf.PRE_HOOK_DIRECTORY , conf.pre_analysis_hooks),
|
self.plugins = [(conf.PRE_HOOK_DIRECTORY , conf.pre_analysis_hooks),
|
||||||
(conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks),
|
(conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks),
|
||||||
(conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)]
|
(conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)]
|
||||||
|
@ -334,6 +335,16 @@ class IWLA(object):
|
||||||
super_hit['robot'] = False
|
super_hit['robot'] = False
|
||||||
super_hit['hit_only'] = 0
|
super_hit['hit_only'] = 0
|
||||||
|
|
||||||
|
def _normalizeURI(self, uri):
|
||||||
|
if uri == '/': return uri
|
||||||
|
uri = self.normalize_uri_final_slashes.sub('', uri)
|
||||||
|
return uri
|
||||||
|
|
||||||
|
def _normalizeParameters(self, parameters):
|
||||||
|
# No parameters
|
||||||
|
if parameters == '?': return None
|
||||||
|
return parameters
|
||||||
|
|
||||||
def _decodeHTTPRequest(self, hit):
|
def _decodeHTTPRequest(self, hit):
|
||||||
if not 'request' in hit.keys(): return False
|
if not 'request' in hit.keys(): return False
|
||||||
|
|
||||||
|
@ -344,9 +355,11 @@ class IWLA(object):
|
||||||
uri_groups = self.uri_re.match(hit['extract_request']['http_uri'])
|
uri_groups = self.uri_re.match(hit['extract_request']['http_uri'])
|
||||||
if uri_groups:
|
if uri_groups:
|
||||||
d = uri_groups.groupdict()
|
d = uri_groups.groupdict()
|
||||||
hit['extract_request']['extract_uri'] = d['extract_uri']
|
hit['extract_request']['extract_uri'] = self._normalizeURI(d['extract_uri'])
|
||||||
if 'extract_parameters' in d.keys():
|
if 'extract_parameters' in d.keys():
|
||||||
hit['extract_request']['extract_parameters'] = d['extract_parameters']
|
parameters = self._normalizeParameters(d['extract_parameters'])
|
||||||
|
if parameters:
|
||||||
|
hit['extract_request']['extract_parameters'] = parameters
|
||||||
else:
|
else:
|
||||||
self.logger.warning("Bad request extraction %s" % (hit['request']))
|
self.logger.warning("Bad request extraction %s" % (hit['request']))
|
||||||
return False
|
return False
|
||||||
|
@ -354,7 +367,7 @@ class IWLA(object):
|
||||||
if hit['http_referer']:
|
if hit['http_referer']:
|
||||||
referer_groups = self.uri_re.match(hit['http_referer'])
|
referer_groups = self.uri_re.match(hit['http_referer'])
|
||||||
if referer_groups:
|
if referer_groups:
|
||||||
hit['extract_referer'] = referer_groups.groupdict()
|
hit['extract_referer'] = self._normalizeURI(referer_groups.groupdict())
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _decodeTime(self, hit):
|
def _decodeTime(self, hit):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user