diff --git a/ChangeLog b/ChangeLog index 3fb03fa..8b5a4e5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -v0.3 (13/07/2015) +v0.3 (20/12/2015) ** User ** Add referers_diff display plugin Add year statistics in month details @@ -21,6 +21,7 @@ v0.3 (13/07/2015) Sort documentation output Add debug traces in robots plugin Update awstats data + Dont count 'uri' and 'uri/' as differents uri ** Bugs ** Forgot tag diff --git a/iwla.py b/iwla.py index 150f699..73d3d2c 100755 --- a/iwla.py +++ b/iwla.py @@ -148,6 +148,7 @@ class IWLA(object): self.log_re = re.compile(self.log_format_extracted) self.uri_re = re.compile(r'(?P[^\?#]+)(\?(?P[^#]+))?(#.*)?') self.domain_name_re = re.compile(r'.*%s' % conf.domain_name) + self.normalize_uri_final_slashes = re.compile(r'/+$') self.plugins = [(conf.PRE_HOOK_DIRECTORY , conf.pre_analysis_hooks), (conf.POST_HOOK_DIRECTORY , conf.post_analysis_hooks), (conf.DISPLAY_HOOK_DIRECTORY , conf.display_hooks)] @@ -334,6 +335,16 @@ class IWLA(object): super_hit['robot'] = False super_hit['hit_only'] = 0 + def _normalizeURI(self, uri): + if uri == '/': return uri + uri = self.normalize_uri_final_slashes.sub('', uri) + return uri + + def _normalizeParameters(self, parameters): + # No parameters + if parameters == '?': return None + return parameters + def _decodeHTTPRequest(self, hit): if not 'request' in hit.keys(): return False @@ -344,9 +355,11 @@ class IWLA(object): uri_groups = self.uri_re.match(hit['extract_request']['http_uri']) if uri_groups: d = uri_groups.groupdict() - hit['extract_request']['extract_uri'] = d['extract_uri'] + hit['extract_request']['extract_uri'] = self._normalizeURI(d['extract_uri']) if 'extract_parameters' in d.keys(): - hit['extract_request']['extract_parameters'] = d['extract_parameters'] + parameters = self._normalizeParameters(d['extract_parameters']) + if parameters: + hit['extract_request']['extract_parameters'] = parameters else: self.logger.warning("Bad request extraction %s" % (hit['request'])) return False @@ -354,7 +367,7 @@ class IWLA(object): if hit['http_referer']: referer_groups = self.uri_re.match(hit['http_referer']) if referer_groups: - hit['extract_referer'] = referer_groups.groupdict() + hit['extract_referer'] = self._normalizeURI(referer_groups.groupdict()) return True def _decodeTime(self, hit):