From d78739157b72c9769fd9aa8b6392c723f2241f49 Mon Sep 17 00:00:00 2001 From: Gregory Soutade Date: Tue, 30 Jan 2024 11:28:10 +0100 Subject: [PATCH] Remove all trailing slashs of URL before starting analyze --- iwla.py | 9 +++++---- plugins/post_analysis/top_pages.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/iwla.py b/iwla.py index 9decddf..dde5ba2 100755 --- a/iwla.py +++ b/iwla.py @@ -391,14 +391,15 @@ class IWLA(object): super_hit['robot'] = None super_hit['hit_only'] = 0 - def _normalizeURI(self, uri, removeFileSlash=False): + def _normalizeURI(self, uri, removeFileSlash=True): if uri == '/': return uri # Remove protocol uri = self.protocol_re.sub('', uri) # Remove double / uri = self.slash_re.sub('/', uri) - if removeFileSlash and uri[-1] == '/': - uri = uri[:-1] + if removeFileSlash: + while len(uri) > 1 and uri[-1] == '/': + uri = uri[:-1] return uri def _normalizeParameters(self, parameters): @@ -429,7 +430,7 @@ class IWLA(object): referer_groups = self.uri_re.match(hit['http_referer']) if referer_groups: hit['extract_referer'] = referer_groups.groupdict("") - hit['extract_referer']['extract_uri'] = self._normalizeURI(hit['extract_referer']['extract_uri'], True) + hit['extract_referer']['extract_uri'] = self._normalizeURI(hit['extract_referer']['extract_uri']) hit['extract_referer']['extract_parameters'] = self._normalizeParameters(hit['extract_referer']['extract_parameters']) hit['remote_ip'] = hit['remote_addr'] diff --git a/plugins/post_analysis/top_pages.py b/plugins/post_analysis/top_pages.py index 1e649d3..5d8d6bd 100644 --- a/plugins/post_analysis/top_pages.py +++ b/plugins/post_analysis/top_pages.py @@ -75,7 +75,7 @@ class IWLAPostAnalysisTopPages(IPlugin): uri = r['extract_request']['extract_uri'] if self.index_re.match(uri): - uri = '/' + uri = '' uri = "%s%s" % (r.get('server_name', ''), uri)