diff --git a/ChangeLog b/ChangeLog
index 3fb03fa..8b5a4e5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,4 @@
-v0.3 (13/07/2015)
+v0.3 (20/12/2015)
** User **
Add referers_diff display plugin
Add year statistics in month details
@@ -21,6 +21,7 @@ v0.3 (13/07/2015)
Sort documentation output
Add debug traces in robots plugin
Update awstats data
+ Dont count 'uri' and 'uri/' as differents uri
** Bugs **
Forgot
tag
diff --git a/iwla.py b/iwla.py
index 150f699..73d3d2c 100755
--- a/iwla.py
+++ b/iwla.py
@@ -148,6 +148,7 @@ class IWLA(object):
self.log_re = re.compile(self.log_format_extracted)
self.uri_re = re.compile(r'(?P[^\?#]+)(\?(?P[^#]+))?(#.*)?')
self.domain_name_re = re.compile(r'.*%s' % conf.domain_name)
+ self.normalize_uri_final_slashes = re.compile(r'/+$')
self.plugins = [(conf.PRE_HOOK_DIRECTORY , conf.pre_analysis_hooks),
(conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks),
(conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)]
@@ -334,6 +335,16 @@ class IWLA(object):
super_hit['robot'] = False
super_hit['hit_only'] = 0
+ def _normalizeURI(self, uri):
+ if uri == '/': return uri
+ uri = self.normalize_uri_final_slashes.sub('', uri)
+ return uri
+
+ def _normalizeParameters(self, parameters):
+ # No parameters
+ if parameters == '?': return None
+ return parameters
+
def _decodeHTTPRequest(self, hit):
if not 'request' in hit.keys(): return False
@@ -344,9 +355,11 @@ class IWLA(object):
uri_groups = self.uri_re.match(hit['extract_request']['http_uri'])
if uri_groups:
d = uri_groups.groupdict()
- hit['extract_request']['extract_uri'] = d['extract_uri']
+ hit['extract_request']['extract_uri'] = self._normalizeURI(d['extract_uri'])
if 'extract_parameters' in d.keys():
- hit['extract_request']['extract_parameters'] = d['extract_parameters']
+ parameters = self._normalizeParameters(d['extract_parameters'])
+ if parameters:
+ hit['extract_request']['extract_parameters'] = parameters
else:
self.logger.warning("Bad request extraction %s" % (hit['request']))
return False
@@ -354,7 +367,7 @@ class IWLA(object):
if hit['http_referer']:
referer_groups = self.uri_re.match(hit['http_referer'])
if referer_groups:
- hit['extract_referer'] = referer_groups.groupdict()
+ hit['extract_referer'] = self._normalizeURI(referer_groups.groupdict())
return True
def _decodeTime(self, hit):