Remove all trailing slashs of URL before starting analyze
This commit is contained in:
parent
d6d216db4d
commit
d78739157b
7
iwla.py
7
iwla.py
|
@ -391,13 +391,14 @@ class IWLA(object):
|
||||||
super_hit['robot'] = None
|
super_hit['robot'] = None
|
||||||
super_hit['hit_only'] = 0
|
super_hit['hit_only'] = 0
|
||||||
|
|
||||||
def _normalizeURI(self, uri, removeFileSlash=False):
|
def _normalizeURI(self, uri, removeFileSlash=True):
|
||||||
if uri == '/': return uri
|
if uri == '/': return uri
|
||||||
# Remove protocol
|
# Remove protocol
|
||||||
uri = self.protocol_re.sub('', uri)
|
uri = self.protocol_re.sub('', uri)
|
||||||
# Remove double /
|
# Remove double /
|
||||||
uri = self.slash_re.sub('/', uri)
|
uri = self.slash_re.sub('/', uri)
|
||||||
if removeFileSlash and uri[-1] == '/':
|
if removeFileSlash:
|
||||||
|
while len(uri) > 1 and uri[-1] == '/':
|
||||||
uri = uri[:-1]
|
uri = uri[:-1]
|
||||||
return uri
|
return uri
|
||||||
|
|
||||||
|
@ -429,7 +430,7 @@ class IWLA(object):
|
||||||
referer_groups = self.uri_re.match(hit['http_referer'])
|
referer_groups = self.uri_re.match(hit['http_referer'])
|
||||||
if referer_groups:
|
if referer_groups:
|
||||||
hit['extract_referer'] = referer_groups.groupdict("")
|
hit['extract_referer'] = referer_groups.groupdict("")
|
||||||
hit['extract_referer']['extract_uri'] = self._normalizeURI(hit['extract_referer']['extract_uri'], True)
|
hit['extract_referer']['extract_uri'] = self._normalizeURI(hit['extract_referer']['extract_uri'])
|
||||||
hit['extract_referer']['extract_parameters'] = self._normalizeParameters(hit['extract_referer']['extract_parameters'])
|
hit['extract_referer']['extract_parameters'] = self._normalizeParameters(hit['extract_referer']['extract_parameters'])
|
||||||
|
|
||||||
hit['remote_ip'] = hit['remote_addr']
|
hit['remote_ip'] = hit['remote_addr']
|
||||||
|
|
|
@ -75,7 +75,7 @@ class IWLAPostAnalysisTopPages(IPlugin):
|
||||||
|
|
||||||
uri = r['extract_request']['extract_uri']
|
uri = r['extract_request']['extract_uri']
|
||||||
if self.index_re.match(uri):
|
if self.index_re.match(uri):
|
||||||
uri = '/'
|
uri = ''
|
||||||
|
|
||||||
uri = "%s%s" % (r.get('server_name', ''), uri)
|
uri = "%s%s" % (r.get('server_name', ''), uri)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user